/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2514 by jfenwick, Fri Jul 3 00:57:45 2009 UTC revision 2779 by caltinay, Thu Nov 26 03:51:15 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 109  namespace escript Line 109  namespace escript
109  namespace  namespace
110  {  {
111    
112    
113    // enabling this will print out when ever the maximum stacksize used by resolve increases
114    // it assumes _OPENMP is also in use
115    //#define LAZY_STACK_PROF
116    
117    
118    
119    #ifndef _OPENMP
120      #ifdef LAZY_STACK_PROF
121      #undef LAZY_STACK_PROF
122      #endif
123    #endif
124    
125    
126    #ifdef LAZY_STACK_PROF
127    std::vector<void*> stackstart(getNumberOfThreads());
128    std::vector<void*> stackend(getNumberOfThreads());
129    size_t maxstackuse=0;
130    #endif
131    
132  enum ES_opgroup  enum ES_opgroup
133  {  {
134     G_UNKNOWN,     G_UNKNOWN,
# Line 119  enum ES_opgroup Line 139  enum ES_opgroup
139     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
140     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
141     G_TENSORPROD,    // general tensor product     G_TENSORPROD,    // general tensor product
142     G_NP1OUT_2P      // non-pointwise op with one output requiring two params     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
143       G_REDUCTION      // non-pointwise unary op with a scalar output
144  };  };
145    
146    
# Line 134  string ES_opstrings[]={"UNKNOWN","IDENTI Line 155  string ES_opstrings[]={"UNKNOWN","IDENTI
155              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
156              "prod",              "prod",
157              "transpose", "trace",              "transpose", "trace",
158              "swapaxes"};              "swapaxes",
159  int ES_opcount=41;              "minval", "maxval"};
160    int ES_opcount=43;
161  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
162              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
163              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 145  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 167  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
167              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
168              G_TENSORPROD,              G_TENSORPROD,
169              G_NP1OUT_P, G_NP1OUT_P,              G_NP1OUT_P, G_NP1OUT_P,
170              G_NP1OUT_2P};              G_NP1OUT_2P,
171                G_REDUCTION, G_REDUCTION};
172  inline  inline
173  ES_opgroup  ES_opgroup
174  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 190  resultShape(DataAbstract_ptr left, DataA Line 213  resultShape(DataAbstract_ptr left, DataA
213        {        {
214          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
215        }        }
216    
217        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
218        {        {
219          return right->getShape();          return right->getShape();
# Line 217  resultShape(DataAbstract_ptr left, ES_op Line 241  resultShape(DataAbstract_ptr left, ES_op
241          int rank=left->getRank();          int rank=left->getRank();
242          if (axis_offset<0 || axis_offset>rank)          if (axis_offset<0 || axis_offset>rank)
243          {          {
244                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
245              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
246              for (int i=0; i<rank; i++)              throw DataException(e.str());
247            }
248            for (int i=0; i<rank; i++)
249          {          {
250             int index = (axis_offset+i)%rank;             int index = (axis_offset+i)%rank;
251                 sh.push_back(s[index]); // Append to new shape             sh.push_back(s[index]); // Append to new shape
252              }          }
253          return sh;          return sh;
254         }         }
255      break;      break;
# Line 302  SwapShape(DataAbstract_ptr left, const i Line 328  SwapShape(DataAbstract_ptr left, const i
328          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
329       }       }
330       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
331          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
332            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
333            throw DataException(e.str());
334       }       }
335       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
336           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
337            throw DataException(e.str());
338       }       }
339       if (axis0 == axis1) {       if (axis0 == axis1) {
340           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 394  GTPShape(DataAbstract_ptr left, DataAbst Line 423  GTPShape(DataAbstract_ptr left, DataAbst
423    return shape2;    return shape2;
424  }  }
425    
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
426  }   // end anonymous namespace  }   // end anonymous namespace
427    
428    
# Line 434  opToString(ES_optype op) Line 438  opToString(ES_optype op)
438    return ES_opstrings[op];    return ES_opstrings[op];
439  }  }
440    
 #ifdef LAZY_NODE_STORAGE  
441  void DataLazy::LazyNodeSetup()  void DataLazy::LazyNodeSetup()
442  {  {
443  #ifdef _OPENMP  #ifdef _OPENMP
# Line 451  void DataLazy::LazyNodeSetup() Line 454  void DataLazy::LazyNodeSetup()
454      m_sampleids[0]=-1;      m_sampleids[0]=-1;
455  #endif  // _OPENMP  #endif  // _OPENMP
456  }  }
 #endif   // LAZY_NODE_STORAGE  
457    
458    
459  // Creates an identity node  // Creates an identity node
460  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
461      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
 #ifdef LAZY_NODE_STORAGE  
462      ,m_sampleids(0),      ,m_sampleids(0),
463      m_samples(1)      m_samples(1)
 #endif  
464  {  {
465     if (p->isLazy())     if (p->isLazy())
466     {     {
# Line 480  LAZYDEBUG(cout << "(1)Lazy created with Line 480  LAZYDEBUG(cout << "(1)Lazy created with
480  }  }
481    
482  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
483      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
484      m_op(op),      m_op(op),
485      m_axis_offset(0),      m_axis_offset(0),
486      m_transpose(0),      m_transpose(0),
487      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
488  {  {
489     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
490     {     {
491      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
492     }     }
# Line 502  DataLazy::DataLazy(DataAbstract_ptr left Line 502  DataLazy::DataLazy(DataAbstract_ptr left
502     }     }
503     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
504     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
505     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
506     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
507     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
508     LazyNodeSetup();     LazyNodeSetup();
 #endif  
509     SIZELIMIT     SIZELIMIT
510  }  }
511    
# Line 576  LAZYDEBUG(cout << "Right " << right.get( Line 572  LAZYDEBUG(cout << "Right " << right.get(
572      m_readytype='C';      m_readytype='C';
573     }     }
574     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
575     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
576     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
577     LazyNodeSetup();     LazyNodeSetup();
 #endif  
578     SIZELIMIT     SIZELIMIT
579  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
580  }  }
# Line 646  DataLazy::DataLazy(DataAbstract_ptr left Line 638  DataLazy::DataLazy(DataAbstract_ptr left
638      m_readytype='C';      m_readytype='C';
639     }     }
640     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
641     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
642     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
643     LazyNodeSetup();     LazyNodeSetup();
 #endif  
644     SIZELIMIT     SIZELIMIT
645  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
646  }  }
# Line 680  DataLazy::DataLazy(DataAbstract_ptr left Line 668  DataLazy::DataLazy(DataAbstract_ptr left
668     }     }
669     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
670     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
671     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
672     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
673     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
674     LazyNodeSetup();     LazyNodeSetup();
 #endif  
675     SIZELIMIT     SIZELIMIT
676  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
677  }  }
# Line 714  DataLazy::DataLazy(DataAbstract_ptr left Line 698  DataLazy::DataLazy(DataAbstract_ptr left
698     }     }
699     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
700     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
701     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
702     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
703     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
704     LazyNodeSetup();     LazyNodeSetup();
 #endif  
705     SIZELIMIT     SIZELIMIT
706  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
707  }  }
# Line 749  DataLazy::DataLazy(DataAbstract_ptr left Line 729  DataLazy::DataLazy(DataAbstract_ptr left
729     }     }
730     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
731     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
732     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
733     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
734     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
735     LazyNodeSetup();     LazyNodeSetup();
 #endif  
736     SIZELIMIT     SIZELIMIT
737  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
738  }  }
739    
740  DataLazy::~DataLazy()  DataLazy::~DataLazy()
741  {  {
 #ifdef LAZY_NODE_SETUP  
742     delete[] m_sampleids;     delete[] m_sampleids;
    delete[] m_samples;  
 #endif  
 }  
   
   
 int  
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
743  }  }
744    
745    
 size_t  
 DataLazy::getMaxSampleSize() const  
 {  
     return m_maxsamplesize;  
 }  
   
   
   
 size_t  
 DataLazy::getSampleBufferSize() const  
 {  
     return m_maxsamplesize*(max(1,m_buffsRequired));  
 }  
   
746  /*  /*
747    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
748    This does the work for the collapse method.    This does the work for the collapse method.
# Line 933  DataLazy::collapseToReady() Line 885  DataLazy::collapseToReady()
885      case SWAP:      case SWAP:
886      result=left.swapaxes(m_axis_offset, m_transpose);      result=left.swapaxes(m_axis_offset, m_transpose);
887      break;      break;
888        case MINVAL:
889        result=left.minval();
890        break;
891        case MAXVAL:
892        result=left.minval();
893        break;
894      default:      default:
895      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
896    }    }
# Line 960  DataLazy::collapse() Line 918  DataLazy::collapse()
918    m_op=IDENTITY;    m_op=IDENTITY;
919  }  }
920    
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
   }  
   const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);  
   const double* left=&((*vleft)[roffset]);  
   double* result=&(v[offset]);  
   roffset=offset;  
   switch (m_op)  
   {  
     case SIN:    
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);  
     break;  
     case COS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);  
     break;  
     case TAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);  
     break;  
     case ASIN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);  
     break;  
     case ACOS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);  
     break;  
     case ATAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);  
     break;  
     case SINH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);  
     break;  
     case COSH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);  
     break;  
     case TANH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);  
     break;  
     case ERF:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
    case ASINH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
    case ACOSH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
    case ATANH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
     case LOG10:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);  
     break;  
     case LOG:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);  
     break;  
     case SIGN:  
     tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
     break;  
     case ABS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);  
     break;  
     case NEG:  
     tensor_unary_operation(m_samplesize, left, result, negate<double>());  
     break;  
     case POS:  
     // it doesn't mean anything for delayed.  
     // it will just trigger a deep copy of the lazy object  
     throw DataException("Programmer error - POS not supported for lazy data.");  
     break;  
     case EXP:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);  
     break;  
     case SQRT:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);  
     break;  
     case RECIP:  
     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
     break;  
     case GZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
     break;  
     case LZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
     break;  
     case GEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
     break;  
     case LEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
     break;  
 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  
     case NEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));  
     break;  
     case EZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));  
     break;  
   
     default:  
     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
921    
922    
923    
924    
925    
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset=roffset+m_samplesize;  
 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)  
   const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t step=getNoValues();  
   switch (m_op)  
   {  
     case SYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     case NSYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case TRACE:  
     for (loop=0;loop<numsteps;++loop)  
     {  
 size_t zz=sampleNo*getNumDPPSample()+loop;  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "point=" <<  zz<< endl;)  
 LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)  
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     case TRANS:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
 /*  
   \brief Compute the value of the expression (unary operation with int params) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case SWAP:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
   
926  #define PROC_OP(TYPE,X)                               \  #define PROC_OP(TYPE,X)                               \
927      for (int j=0;j<onumsteps;++j)\      for (int j=0;j<onumsteps;++j)\
928      {\      {\
# Line 1295  LAZYDEBUG(cout << " result=      " << re Line 939  LAZYDEBUG(cout << " result=      " << re
939        rroffset+=orightstep;\        rroffset+=orightstep;\
940      }      }
941    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // If both children are expanded, then we can process them in a single operation (we treat  
 // the whole sample as one big datapoint.  
 // If one of the children is not expanded, then we need to treat each point in the sample  
 // individually.  
 // There is an additional complication when scalar operations are considered.  
 // For example, 2+Vector.  
 // In this case each double within the point is treated individually  
 DataTypes::ValueType*  
 DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   if (!leftExp && !rightExp)  
   {  
     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");  
   }  
   bool leftScalar=(m_left->getRank()==0);  
   bool rightScalar=(m_right->getRank()==0);  
   if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))  
   {  
     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");  
   }  
   size_t leftsize=m_left->getNoValues();  
   size_t rightsize=m_right->getNoValues();  
   size_t chunksize=1;           // how many doubles will be processed in one go  
   int leftstep=0;       // how far should the left offset advance after each step  
   int rightstep=0;  
   int numsteps=0;       // total number of steps for the inner loop  
   int oleftstep=0;  // the o variables refer to the outer loop  
   int orightstep=0; // The outer loop is only required in cases where there is an extended scalar  
   int onumsteps=1;  
     
   bool LES=(leftExp && leftScalar); // Left is an expanded scalar  
   bool RES=(rightExp && rightScalar);  
   bool LS=(!leftExp && leftScalar); // left is a single scalar  
   bool RS=(!rightExp && rightScalar);  
   bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar  
   bool RN=(!rightExp && !rightScalar);  
   bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar  
   bool REN=(rightExp && !rightScalar);  
   
   if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars  
   {  
     chunksize=m_left->getNumDPPSample()*leftsize;  
     leftstep=0;  
     rightstep=0;  
     numsteps=1;  
   }  
   else if (LES || RES)  
   {  
     chunksize=1;  
     if (LES)        // left is an expanded scalar  
     {  
         if (RS)  
         {  
            leftstep=1;  
            rightstep=0;  
            numsteps=m_left->getNumDPPSample();  
         }  
         else        // RN or REN  
         {  
            leftstep=0;  
            oleftstep=1;  
            rightstep=1;  
            orightstep=(RN ? -(int)rightsize : 0);  
            numsteps=rightsize;  
            onumsteps=m_left->getNumDPPSample();  
         }  
     }  
     else        // right is an expanded scalar  
     {  
         if (LS)  
         {  
            rightstep=1;  
            leftstep=0;  
            numsteps=m_right->getNumDPPSample();  
         }  
         else  
         {  
            rightstep=0;  
            orightstep=1;  
            leftstep=1;  
            oleftstep=(LN ? -(int)leftsize : 0);  
            numsteps=leftsize;  
            onumsteps=m_right->getNumDPPSample();  
         }  
     }  
   }  
   else  // this leaves (LEN, RS), (LEN, RN) and their transposes  
   {  
     if (LEN)    // and Right will be a single value  
     {  
         chunksize=rightsize;  
         leftstep=rightsize;  
         rightstep=0;  
         numsteps=m_left->getNumDPPSample();  
         if (RS)  
         {  
            numsteps*=leftsize;  
         }  
     }  
     else    // REN  
     {  
         chunksize=leftsize;  
         rightstep=leftsize;  
         leftstep=0;  
         numsteps=m_right->getNumDPPSample();  
         if (LS)  
         {  
            numsteps*=rightsize;  
         }  
     }  
   }  
   
   int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0  
     // Get the values of sub-expressions  
   const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on  
     // calcBufss for why we can't put offset as the 2nd param above  
   const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  
 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  
 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  
 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)  
 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)  
 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  
 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  
   
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case ADD:  
         PROC_OP(NO_ARG,plus<double>());  
     break;  
     case SUB:  
     PROC_OP(NO_ARG,minus<double>());  
     break;  
     case MUL:  
     PROC_OP(NO_ARG,multiplies<double>());  
     break;  
     case DIV:  
     PROC_OP(NO_ARG,divides<double>());  
     break;  
     case POW:  
        PROC_OP(double (double,double),::pow);  
     break;  
     default:  
     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // unlike the other resolve helpers, we must treat these datapoints separately.  
 DataTypes::ValueType*  
 DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   int steps=getNumDPPSample();  
 /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
   int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method  
   int rightStep=(rightExp?m_right->getNoValues() : 0);  
   
   int resultStep=getNoValues();  
     // Get the values of sub-expressions (leave a gap of one sample for the result).  
   int gap=offset+m_samplesize;    
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
   
   const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);  
   gap+=m_left->getMaxSampleSize();  
   
   
 LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)  
   
   
   const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);  
   
 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  
 cout << getNoValues() << endl;)  
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
   
 for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  
 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  
 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  
 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)  
 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  
 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case PROD:  
     for (int i=0;i<steps;++i,resultp+=resultStep)  
     {  
   
 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)  
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
           const double *ptr_0 = &((*left)[lroffset]);  
           const double *ptr_1 = &((*right)[rroffset]);  
   
 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)  
 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)  
   
           matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
   
 LAZYDEBUG(cout << "Results--\n";  
 {  
   DataVector dv(getNoValues());  
 for (int z=0;z<getNoValues();++z)  
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
       lroffset+=leftStep;  
       rroffset+=rightStep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
 #ifdef LAZY_NODE_STORAGE  
942    
943  // The result will be stored in m_samples  // The result will be stored in m_samples
944  // The return value is a pointer to the DataVector, offset is the offset within the return value  // The return value is a pointer to the DataVector, offset is the offset within the return value
# Line 1601  LAZYDEBUG(cout << "Resolve sample " << t Line 954  LAZYDEBUG(cout << "Resolve sample " << t
954    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
955    {    {
956      const ValueType& vec=m_id->getVectorRO();      const ValueType& vec=m_id->getVectorRO();
 //     if (m_readytype=='C')  
 //     {  
 //  roffset=0;      // all samples read from the same position  
 //  return &(m_samples);  
 //     }  
957      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
958    #ifdef LAZY_STACK_PROF
959    int x;
960    if (&x<stackend[omp_get_thread_num()])
961    {
962           stackend[omp_get_thread_num()]=&x;
963    }
964    #endif
965      return &(vec);      return &(vec);
966    }    }
967    if (m_readytype!='E')    if (m_readytype!='E')
# Line 1628  LAZYDEBUG(cout << "Resolve sample " << t Line 983  LAZYDEBUG(cout << "Resolve sample " << t
983    case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);    case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
984    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
985    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
986      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
987    default:    default:
988      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
989    }    }
# Line 1766  DataLazy::resolveNodeUnary(int tid, int Line 1122  DataLazy::resolveNodeUnary(int tid, int
1122    
1123    
1124  const DataTypes::ValueType*  const DataTypes::ValueType*
1125    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1126    {
1127        // we assume that any collapsing has been done before we get here
1128        // since we only have one argument we don't need to think about only
1129        // processing single points.
1130        // we will also know we won't get identity nodes
1131      if (m_readytype!='E')
1132      {
1133        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1134      }
1135      if (m_op==IDENTITY)
1136      {
1137        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1138      }
1139      size_t loffset=0;
1140      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1141    
1142      roffset=m_samplesize*tid;
1143      unsigned int ndpps=getNumDPPSample();
1144      unsigned int psize=DataTypes::noValues(getShape());
1145      double* result=&(m_samples[roffset]);
1146      switch (m_op)
1147      {
1148        case MINVAL:
1149        {
1150          for (unsigned int z=0;z<ndpps;++z)
1151          {
1152            FMin op;
1153            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1154            loffset+=psize;
1155            result++;
1156          }
1157        }
1158        break;
1159        case MAXVAL:
1160        {
1161          for (unsigned int z=0;z<ndpps;++z)
1162          {
1163          FMax op;
1164          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1165          loffset+=psize;
1166          result++;
1167          }
1168        }
1169        break;
1170        default:
1171        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1172      }
1173      return &(m_samples);
1174    }
1175    
1176    const DataTypes::ValueType*
1177  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1178  {  {
1179      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
# Line 2119  LAZYDEBUG(cout << DataTypes::pointToStri Line 1527  LAZYDEBUG(cout << DataTypes::pointToStri
1527    roffset=offset;    roffset=offset;
1528    return &m_samples;    return &m_samples;
1529  }  }
 #endif //LAZY_NODE_STORAGE  
   
 /*  
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
1530    
 }  
1531    
1532  const DataTypes::ValueType*  const DataTypes::ValueType*
1533  DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset)
1534  {  {
1535  #ifdef _OPENMP  #ifdef _OPENMP
1536      int tid=omp_get_thread_num();      int tid=omp_get_thread_num();
1537  #else  #else
1538      int tid=0;      int tid=0;
1539  #endif  #endif
1540      return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);  
1541    #ifdef LAZY_STACK_PROF
1542        stackstart[tid]=&tid;
1543        stackend[tid]=&tid;
1544        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1545        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1546        #pragma omp critical
1547        if (d>maxstackuse)
1548        {
1549    cout << "Max resolve Stack use " << d << endl;
1550            maxstackuse=d;
1551        }
1552        return r;
1553    #else
1554        return resolveNodeSample(tid, sampleNo, roffset);
1555    #endif
1556  }  }
1557    
1558    
# Line 2196  DataLazy::resolveToIdentity() Line 1562  DataLazy::resolveToIdentity()
1562  {  {
1563     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1564      return;      return;
 #ifndef LAZY_NODE_STORAGE  
    DataReady_ptr p=resolveVectorWorker();  
 #else  
1565     DataReady_ptr p=resolveNodeWorker();     DataReady_ptr p=resolveNodeWorker();
 #endif  
1566     makeIdentity(p);     makeIdentity(p);
1567  }  }
1568    
# Line 2216  void DataLazy::makeIdentity(const DataRe Line 1578  void DataLazy::makeIdentity(const DataRe
1578     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1579     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1580     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1581     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1582     m_left.reset();     m_left.reset();
1583     m_right.reset();     m_right.reset();
1584  }  }
# Line 2231  DataLazy::resolve() Line 1591  DataLazy::resolve()
1591      return m_id;      return m_id;
1592  }  }
1593    
 #ifdef LAZY_NODE_STORAGE  
   
1594  // This version of resolve uses storage in each node to hold results  // This version of resolve uses storage in each node to hold results
1595  DataReady_ptr  DataReady_ptr
1596  DataLazy::resolveNodeWorker()  DataLazy::resolveNodeWorker()
# Line 2254  DataLazy::resolveNodeWorker() Line 1612  DataLazy::resolveNodeWorker()
1612    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1613    const ValueType* res=0;   // Storage for answer    const ValueType* res=0;   // Storage for answer
1614  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1615    #pragma omp parallel for private(sample,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1616    {    {
1617      size_t roffset=0;      size_t roffset=0;
1618    #ifdef LAZY_STACK_PROF
1619        stackstart[omp_get_thread_num()]=&roffset;
1620        stackend[omp_get_thread_num()]=&roffset;
1621    #endif
1622        #pragma omp for schedule(static)
1623        for (sample=0;sample<totalsamples;++sample)
1624        {
1625            roffset=0;
1626  #ifdef _OPENMP  #ifdef _OPENMP
1627      res=resolveNodeSample(omp_get_thread_num(),sample,roffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1628  #else  #else
1629      res=resolveNodeSample(0,sample,roffset);              res=resolveNodeSample(0,sample,roffset);
1630  #endif  #endif
1631  LAZYDEBUG(cout << "Sample #" << sample << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1632  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1633      DataVector::size_type outoffset=result->getPointOffset(sample,0);              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1634      memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1635    }      }
   return resptr;  
 }  
   
 #endif // LAZY_NODE_STORAGE  
   
 // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
 DataReady_ptr  
 DataLazy::resolveVectorWorker()  
 {  
   
 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  
   if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally  
   {  
     collapse();  
1636    }    }
1637    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.  #ifdef LAZY_STACK_PROF
1638      for (int i=0;i<getNumberOfThreads();++i)
1639    {    {
1640      return m_id;      size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1641    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1642        if (r>maxstackuse)
1643        {
1644            maxstackuse=r;
1645        }
1646    }    }
1647      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'    cout << "Max resolve Stack use=" << maxstackuse << endl;
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVectorRW();  
   DataReady_ptr resptr=DataReady_ptr(result);  
   int sample;  
   size_t outoffset;     // offset in the output data  
   int totalsamples=getNumSamples();  
   const ValueType* res=0;   // Vector storing the answer  
   size_t resoffset=0;       // where in the vector to find the answer  
 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  
   #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)  
   for (sample=0;sample<totalsamples;++sample)  
   {  
 LAZYDEBUG(cout << "################################# " << sample << endl;)  
 #ifdef _OPENMP  
     res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);  
 #else  
     res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.  
1648  #endif  #endif
 LAZYDEBUG(cerr << "-------------------------------- " << endl;)  
 LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  
     outoffset=result->getPointOffset(sample,0);  
 LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)  
     for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector  
     {  
 LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  
     resvec[outoffset]=(*res)[resoffset];  
     }  
 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)  
 LAZYDEBUG(cerr << "*********************************" << endl;)  
   }  
1649    return resptr;    return resptr;
1650  }  }
1651    
# Line 2336  DataLazy::toString() const Line 1654  DataLazy::toString() const
1654  {  {
1655    ostringstream oss;    ostringstream oss;
1656    oss << "Lazy Data:";    oss << "Lazy Data:";
1657    intoString(oss);    if (escriptParams.getPRINT_LAZY_TREE()==0)
1658      {
1659          intoString(oss);
1660      }
1661      else
1662      {
1663        oss << endl;
1664        intoTreeString(oss,"");
1665      }
1666    return oss.str();    return oss.str();
1667  }  }
1668    
# Line 2377  DataLazy::intoString(ostringstream& oss) Line 1703  DataLazy::intoString(ostringstream& oss)
1703    case G_UNARY_P:    case G_UNARY_P:
1704    case G_NP1OUT:    case G_NP1OUT:
1705    case G_NP1OUT_P:    case G_NP1OUT_P:
1706      case G_REDUCTION:
1707      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1708      m_left->intoString(oss);      m_left->intoString(oss);
1709      oss << ')';      oss << ')';
# Line 2399  DataLazy::intoString(ostringstream& oss) Line 1726  DataLazy::intoString(ostringstream& oss)
1726    }    }
1727  }  }
1728    
1729    
1730    void
1731    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1732    {
1733      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1734      switch (getOpgroup(m_op))
1735      {
1736      case G_IDENTITY:
1737        if (m_id->isExpanded())
1738        {
1739           oss << "E";
1740        }
1741        else if (m_id->isTagged())
1742        {
1743          oss << "T";
1744        }
1745        else if (m_id->isConstant())
1746        {
1747          oss << "C";
1748        }
1749        else
1750        {
1751          oss << "?";
1752        }
1753        oss << '@' << m_id.get() << endl;
1754        break;
1755      case G_BINARY:
1756        oss << opToString(m_op) << endl;
1757        indent+='.';
1758        m_left->intoTreeString(oss, indent);
1759        m_right->intoTreeString(oss, indent);
1760        break;
1761      case G_UNARY:
1762      case G_UNARY_P:
1763      case G_NP1OUT:
1764      case G_NP1OUT_P:
1765      case G_REDUCTION:
1766        oss << opToString(m_op) << endl;
1767        indent+='.';
1768        m_left->intoTreeString(oss, indent);
1769        break;
1770      case G_TENSORPROD:
1771        oss << opToString(m_op) << endl;
1772        indent+='.';
1773        m_left->intoTreeString(oss, indent);
1774        m_right->intoTreeString(oss, indent);
1775        break;
1776      case G_NP1OUT_2P:
1777        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1778        indent+='.';
1779        m_left->intoTreeString(oss, indent);
1780        break;
1781      default:
1782        oss << "UNKNOWN";
1783      }
1784    }
1785    
1786    
1787  DataAbstract*  DataAbstract*
1788  DataLazy::deepCopy()  DataLazy::deepCopy()
1789  {  {
1790    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1791    {    {
1792    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1793    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1794      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1795      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1796    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1797    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1798    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1799      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1800      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1801    default:    default:
1802      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1803    }    }
1804  }  }
1805    
1806    
1807    
1808  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
1809  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
1810  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;

Legend:
Removed from v.2514  
changed lines
  Added in v.2779

  ViewVC Help
Powered by ViewVC 1.1.26