/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2514 by jfenwick, Fri Jul 3 00:57:45 2009 UTC revision 2824 by jfenwick, Wed Dec 16 01:22:56 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 44  bool privdebug=false; Line 44  bool privdebug=false;
44    
45  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48    
49    
50    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52  /*  /*
53  How does DataLazy work?  How does DataLazy work?
54  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 109  namespace escript Line 111  namespace escript
111  namespace  namespace
112  {  {
113    
114    
115    // enabling this will print out when ever the maximum stacksize used by resolve increases
116    // it assumes _OPENMP is also in use
117    //#define LAZY_STACK_PROF
118    
119    
120    
121    #ifndef _OPENMP
122      #ifdef LAZY_STACK_PROF
123      #undef LAZY_STACK_PROF
124      #endif
125    #endif
126    
127    
128    #ifdef LAZY_STACK_PROF
129    std::vector<void*> stackstart(getNumberOfThreads());
130    std::vector<void*> stackend(getNumberOfThreads());
131    size_t maxstackuse=0;
132    #endif
133    
134  enum ES_opgroup  enum ES_opgroup
135  {  {
136     G_UNKNOWN,     G_UNKNOWN,
# Line 119  enum ES_opgroup Line 141  enum ES_opgroup
141     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
142     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
143     G_TENSORPROD,    // general tensor product     G_TENSORPROD,    // general tensor product
144     G_NP1OUT_2P      // non-pointwise op with one output requiring two params     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
145       G_REDUCTION      // non-pointwise unary op with a scalar output
146  };  };
147    
148    
# Line 134  string ES_opstrings[]={"UNKNOWN","IDENTI Line 157  string ES_opstrings[]={"UNKNOWN","IDENTI
157              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
158              "prod",              "prod",
159              "transpose", "trace",              "transpose", "trace",
160              "swapaxes"};              "swapaxes",
161  int ES_opcount=41;              "minval", "maxval"};
162    int ES_opcount=43;
163  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
164              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
165              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 145  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 169  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
169              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
170              G_TENSORPROD,              G_TENSORPROD,
171              G_NP1OUT_P, G_NP1OUT_P,              G_NP1OUT_P, G_NP1OUT_P,
172              G_NP1OUT_2P};              G_NP1OUT_2P,
173                G_REDUCTION, G_REDUCTION};
174  inline  inline
175  ES_opgroup  ES_opgroup
176  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 190  resultShape(DataAbstract_ptr left, DataA Line 215  resultShape(DataAbstract_ptr left, DataA
215        {        {
216          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
217        }        }
218    
219        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
220        {        {
221          return right->getShape();          return right->getShape();
# Line 217  resultShape(DataAbstract_ptr left, ES_op Line 243  resultShape(DataAbstract_ptr left, ES_op
243          int rank=left->getRank();          int rank=left->getRank();
244          if (axis_offset<0 || axis_offset>rank)          if (axis_offset<0 || axis_offset>rank)
245          {          {
246                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
247              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
248              for (int i=0; i<rank; i++)              throw DataException(e.str());
249            }
250            for (int i=0; i<rank; i++)
251          {          {
252             int index = (axis_offset+i)%rank;             int index = (axis_offset+i)%rank;
253                 sh.push_back(s[index]); // Append to new shape             sh.push_back(s[index]); // Append to new shape
254              }          }
255          return sh;          return sh;
256         }         }
257      break;      break;
# Line 302  SwapShape(DataAbstract_ptr left, const i Line 330  SwapShape(DataAbstract_ptr left, const i
330          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
331       }       }
332       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
333          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
334            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
335            throw DataException(e.str());
336       }       }
337       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
338           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          stringstream e;
339            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
340            throw DataException(e.str());
341       }       }
342       if (axis0 == axis1) {       if (axis0 == axis1) {
343           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 394  GTPShape(DataAbstract_ptr left, DataAbst Line 426  GTPShape(DataAbstract_ptr left, DataAbst
426    return shape2;    return shape2;
427  }  }
428    
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
429  }   // end anonymous namespace  }   // end anonymous namespace
430    
431    
# Line 434  opToString(ES_optype op) Line 441  opToString(ES_optype op)
441    return ES_opstrings[op];    return ES_opstrings[op];
442  }  }
443    
 #ifdef LAZY_NODE_STORAGE  
444  void DataLazy::LazyNodeSetup()  void DataLazy::LazyNodeSetup()
445  {  {
446  #ifdef _OPENMP  #ifdef _OPENMP
# Line 451  void DataLazy::LazyNodeSetup() Line 457  void DataLazy::LazyNodeSetup()
457      m_sampleids[0]=-1;      m_sampleids[0]=-1;
458  #endif  // _OPENMP  #endif  // _OPENMP
459  }  }
 #endif   // LAZY_NODE_STORAGE  
460    
461    
462  // Creates an identity node  // Creates an identity node
463  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
464      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
 #ifdef LAZY_NODE_STORAGE  
465      ,m_sampleids(0),      ,m_sampleids(0),
466      m_samples(1)      m_samples(1)
 #endif  
467  {  {
468     if (p->isLazy())     if (p->isLazy())
469     {     {
# Line 480  LAZYDEBUG(cout << "(1)Lazy created with Line 483  LAZYDEBUG(cout << "(1)Lazy created with
483  }  }
484    
485  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
486      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
487      m_op(op),      m_op(op),
488      m_axis_offset(0),      m_axis_offset(0),
489      m_transpose(0),      m_transpose(0),
490      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
491  {  {
492     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
493     {     {
494      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
495     }     }
# Line 502  DataLazy::DataLazy(DataAbstract_ptr left Line 505  DataLazy::DataLazy(DataAbstract_ptr left
505     }     }
506     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
507     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
508     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
509     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
510     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
511     LazyNodeSetup();     LazyNodeSetup();
 #endif  
512     SIZELIMIT     SIZELIMIT
513  }  }
514    
# Line 576  LAZYDEBUG(cout << "Right " << right.get( Line 575  LAZYDEBUG(cout << "Right " << right.get(
575      m_readytype='C';      m_readytype='C';
576     }     }
577     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
578     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
579     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
580     LazyNodeSetup();     LazyNodeSetup();
 #endif  
581     SIZELIMIT     SIZELIMIT
582  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
583  }  }
# Line 646  DataLazy::DataLazy(DataAbstract_ptr left Line 641  DataLazy::DataLazy(DataAbstract_ptr left
641      m_readytype='C';      m_readytype='C';
642     }     }
643     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
644     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
645     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
646     LazyNodeSetup();     LazyNodeSetup();
 #endif  
647     SIZELIMIT     SIZELIMIT
648  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
649  }  }
# Line 680  DataLazy::DataLazy(DataAbstract_ptr left Line 671  DataLazy::DataLazy(DataAbstract_ptr left
671     }     }
672     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
673     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
674     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
675     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
676     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
677     LazyNodeSetup();     LazyNodeSetup();
 #endif  
678     SIZELIMIT     SIZELIMIT
679  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
680  }  }
# Line 714  DataLazy::DataLazy(DataAbstract_ptr left Line 701  DataLazy::DataLazy(DataAbstract_ptr left
701     }     }
702     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
703     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
704     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
705     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
706     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
707     LazyNodeSetup();     LazyNodeSetup();
 #endif  
708     SIZELIMIT     SIZELIMIT
709  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
710  }  }
# Line 749  DataLazy::DataLazy(DataAbstract_ptr left Line 732  DataLazy::DataLazy(DataAbstract_ptr left
732     }     }
733     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
734     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
735     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
736     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
737     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
738     LazyNodeSetup();     LazyNodeSetup();
 #endif  
739     SIZELIMIT     SIZELIMIT
740  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
741  }  }
742    
743  DataLazy::~DataLazy()  DataLazy::~DataLazy()
744  {  {
 #ifdef LAZY_NODE_SETUP  
745     delete[] m_sampleids;     delete[] m_sampleids;
    delete[] m_samples;  
 #endif  
 }  
   
   
 int  
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
746  }  }
747    
748    
 size_t  
 DataLazy::getMaxSampleSize() const  
 {  
     return m_maxsamplesize;  
 }  
   
   
   
 size_t  
 DataLazy::getSampleBufferSize() const  
 {  
     return m_maxsamplesize*(max(1,m_buffsRequired));  
 }  
   
749  /*  /*
750    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
751    This does the work for the collapse method.    This does the work for the collapse method.
# Line 933  DataLazy::collapseToReady() Line 888  DataLazy::collapseToReady()
888      case SWAP:      case SWAP:
889      result=left.swapaxes(m_axis_offset, m_transpose);      result=left.swapaxes(m_axis_offset, m_transpose);
890      break;      break;
891        case MINVAL:
892        result=left.minval();
893        break;
894        case MAXVAL:
895        result=left.minval();
896        break;
897      default:      default:
898      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
899    }    }
# Line 960  DataLazy::collapse() Line 921  DataLazy::collapse()
921    m_op=IDENTITY;    m_op=IDENTITY;
922  }  }
923    
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
   }  
   const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);  
   const double* left=&((*vleft)[roffset]);  
   double* result=&(v[offset]);  
   roffset=offset;  
   switch (m_op)  
   {  
     case SIN:    
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);  
     break;  
     case COS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);  
     break;  
     case TAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);  
     break;  
     case ASIN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);  
     break;  
     case ACOS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);  
     break;  
     case ATAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);  
     break;  
     case SINH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);  
     break;  
     case COSH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);  
     break;  
     case TANH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);  
     break;  
     case ERF:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
    case ASINH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
    case ACOSH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
    case ATANH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
     case LOG10:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);  
     break;  
     case LOG:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);  
     break;  
     case SIGN:  
     tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
     break;  
     case ABS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);  
     break;  
     case NEG:  
     tensor_unary_operation(m_samplesize, left, result, negate<double>());  
     break;  
     case POS:  
     // it doesn't mean anything for delayed.  
     // it will just trigger a deep copy of the lazy object  
     throw DataException("Programmer error - POS not supported for lazy data.");  
     break;  
     case EXP:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);  
     break;  
     case SQRT:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);  
     break;  
     case RECIP:  
     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
     break;  
     case GZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
     break;  
     case LZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
     break;  
     case GEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
     break;  
     case LEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
     break;  
 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  
     case NEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));  
     break;  
     case EZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));  
     break;  
   
     default:  
     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
   
   
   
   
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset=roffset+m_samplesize;  
 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)  
   const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t step=getNoValues();  
   switch (m_op)  
   {  
     case SYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     case NSYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case TRACE:  
     for (loop=0;loop<numsteps;++loop)  
     {  
 size_t zz=sampleNo*getNumDPPSample()+loop;  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "point=" <<  zz<< endl;)  
 LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)  
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     case TRANS:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
924    
925    
 /*  
   \brief Compute the value of the expression (unary operation with int params) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case SWAP:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
926    
927    
928    
# Line 1295  LAZYDEBUG(cout << " result=      " << re Line 942  LAZYDEBUG(cout << " result=      " << re
942        rroffset+=orightstep;\        rroffset+=orightstep;\
943      }      }
944    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // If both children are expanded, then we can process them in a single operation (we treat  
 // the whole sample as one big datapoint.  
 // If one of the children is not expanded, then we need to treat each point in the sample  
 // individually.  
 // There is an additional complication when scalar operations are considered.  
 // For example, 2+Vector.  
 // In this case each double within the point is treated individually  
 DataTypes::ValueType*  
 DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   if (!leftExp && !rightExp)  
   {  
     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");  
   }  
   bool leftScalar=(m_left->getRank()==0);  
   bool rightScalar=(m_right->getRank()==0);  
   if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))  
   {  
     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");  
   }  
   size_t leftsize=m_left->getNoValues();  
   size_t rightsize=m_right->getNoValues();  
   size_t chunksize=1;           // how many doubles will be processed in one go  
   int leftstep=0;       // how far should the left offset advance after each step  
   int rightstep=0;  
   int numsteps=0;       // total number of steps for the inner loop  
   int oleftstep=0;  // the o variables refer to the outer loop  
   int orightstep=0; // The outer loop is only required in cases where there is an extended scalar  
   int onumsteps=1;  
     
   bool LES=(leftExp && leftScalar); // Left is an expanded scalar  
   bool RES=(rightExp && rightScalar);  
   bool LS=(!leftExp && leftScalar); // left is a single scalar  
   bool RS=(!rightExp && rightScalar);  
   bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar  
   bool RN=(!rightExp && !rightScalar);  
   bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar  
   bool REN=(rightExp && !rightScalar);  
   
   if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars  
   {  
     chunksize=m_left->getNumDPPSample()*leftsize;  
     leftstep=0;  
     rightstep=0;  
     numsteps=1;  
   }  
   else if (LES || RES)  
   {  
     chunksize=1;  
     if (LES)        // left is an expanded scalar  
     {  
         if (RS)  
         {  
            leftstep=1;  
            rightstep=0;  
            numsteps=m_left->getNumDPPSample();  
         }  
         else        // RN or REN  
         {  
            leftstep=0;  
            oleftstep=1;  
            rightstep=1;  
            orightstep=(RN ? -(int)rightsize : 0);  
            numsteps=rightsize;  
            onumsteps=m_left->getNumDPPSample();  
         }  
     }  
     else        // right is an expanded scalar  
     {  
         if (LS)  
         {  
            rightstep=1;  
            leftstep=0;  
            numsteps=m_right->getNumDPPSample();  
         }  
         else  
         {  
            rightstep=0;  
            orightstep=1;  
            leftstep=1;  
            oleftstep=(LN ? -(int)leftsize : 0);  
            numsteps=leftsize;  
            onumsteps=m_right->getNumDPPSample();  
         }  
     }  
   }  
   else  // this leaves (LEN, RS), (LEN, RN) and their transposes  
   {  
     if (LEN)    // and Right will be a single value  
     {  
         chunksize=rightsize;  
         leftstep=rightsize;  
         rightstep=0;  
         numsteps=m_left->getNumDPPSample();  
         if (RS)  
         {  
            numsteps*=leftsize;  
         }  
     }  
     else    // REN  
     {  
         chunksize=leftsize;  
         rightstep=leftsize;  
         leftstep=0;  
         numsteps=m_right->getNumDPPSample();  
         if (LS)  
         {  
            numsteps*=rightsize;  
         }  
     }  
   }  
   
   int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0  
     // Get the values of sub-expressions  
   const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on  
     // calcBufss for why we can't put offset as the 2nd param above  
   const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  
 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  
 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  
 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)  
 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)  
 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  
 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  
   
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case ADD:  
         PROC_OP(NO_ARG,plus<double>());  
     break;  
     case SUB:  
     PROC_OP(NO_ARG,minus<double>());  
     break;  
     case MUL:  
     PROC_OP(NO_ARG,multiplies<double>());  
     break;  
     case DIV:  
     PROC_OP(NO_ARG,divides<double>());  
     break;  
     case POW:  
        PROC_OP(double (double,double),::pow);  
     break;  
     default:  
     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // unlike the other resolve helpers, we must treat these datapoints separately.  
 DataTypes::ValueType*  
 DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   int steps=getNumDPPSample();  
 /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
   int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method  
   int rightStep=(rightExp?m_right->getNoValues() : 0);  
   
   int resultStep=getNoValues();  
     // Get the values of sub-expressions (leave a gap of one sample for the result).  
   int gap=offset+m_samplesize;    
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
   
   const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);  
   gap+=m_left->getMaxSampleSize();  
   
   
 LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)  
   
   
   const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);  
   
 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  
 cout << getNoValues() << endl;)  
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
   
 for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  
 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  
 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  
 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)  
 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  
 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case PROD:  
     for (int i=0;i<steps;++i,resultp+=resultStep)  
     {  
   
 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)  
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
           const double *ptr_0 = &((*left)[lroffset]);  
           const double *ptr_1 = &((*right)[rroffset]);  
   
 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)  
 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)  
   
           matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
   
 LAZYDEBUG(cout << "Results--\n";  
 {  
   DataVector dv(getNoValues());  
 for (int z=0;z<getNoValues();++z)  
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
       lroffset+=leftStep;  
       rroffset+=rightStep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
 #ifdef LAZY_NODE_STORAGE  
945    
946  // The result will be stored in m_samples  // The result will be stored in m_samples
947  // The return value is a pointer to the DataVector, offset is the offset within the return value  // The return value is a pointer to the DataVector, offset is the offset within the return value
# Line 1601  LAZYDEBUG(cout << "Resolve sample " << t Line 957  LAZYDEBUG(cout << "Resolve sample " << t
957    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
958    {    {
959      const ValueType& vec=m_id->getVectorRO();      const ValueType& vec=m_id->getVectorRO();
 //     if (m_readytype=='C')  
 //     {  
 //  roffset=0;      // all samples read from the same position  
 //  return &(m_samples);  
 //     }  
960      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
961    #ifdef LAZY_STACK_PROF
962    int x;
963    if (&x<stackend[omp_get_thread_num()])
964    {
965           stackend[omp_get_thread_num()]=&x;
966    }
967    #endif
968      return &(vec);      return &(vec);
969    }    }
970    if (m_readytype!='E')    if (m_readytype!='E')
# Line 1628  LAZYDEBUG(cout << "Resolve sample " << t Line 986  LAZYDEBUG(cout << "Resolve sample " << t
986    case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);    case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
987    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
988    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
989      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
990    default:    default:
991      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
992    }    }
# Line 1766  DataLazy::resolveNodeUnary(int tid, int Line 1125  DataLazy::resolveNodeUnary(int tid, int
1125    
1126    
1127  const DataTypes::ValueType*  const DataTypes::ValueType*
1128    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1129    {
1130        // we assume that any collapsing has been done before we get here
1131        // since we only have one argument we don't need to think about only
1132        // processing single points.
1133        // we will also know we won't get identity nodes
1134      if (m_readytype!='E')
1135      {
1136        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1137      }
1138      if (m_op==IDENTITY)
1139      {
1140        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1141      }
1142      size_t loffset=0;
1143      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1144    
1145      roffset=m_samplesize*tid;
1146      unsigned int ndpps=getNumDPPSample();
1147      unsigned int psize=DataTypes::noValues(getShape());
1148      double* result=&(m_samples[roffset]);
1149      switch (m_op)
1150      {
1151        case MINVAL:
1152        {
1153          for (unsigned int z=0;z<ndpps;++z)
1154          {
1155            FMin op;
1156            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1157            loffset+=psize;
1158            result++;
1159          }
1160        }
1161        break;
1162        case MAXVAL:
1163        {
1164          for (unsigned int z=0;z<ndpps;++z)
1165          {
1166          FMax op;
1167          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1168          loffset+=psize;
1169          result++;
1170          }
1171        }
1172        break;
1173        default:
1174        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1175      }
1176      return &(m_samples);
1177    }
1178    
1179    const DataTypes::ValueType*
1180  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1181  {  {
1182      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
# Line 2119  LAZYDEBUG(cout << DataTypes::pointToStri Line 1530  LAZYDEBUG(cout << DataTypes::pointToStri
1530    roffset=offset;    roffset=offset;
1531    return &m_samples;    return &m_samples;
1532  }  }
 #endif //LAZY_NODE_STORAGE  
1533    
 /*  
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1534    
 // the roffset is the offset within the returned vector where the data begins  
1535  const DataTypes::ValueType*  const DataTypes::ValueType*
1536  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset)
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
   
 }  
   
 const DataTypes::ValueType*  
 DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  
1537  {  {
1538  #ifdef _OPENMP  #ifdef _OPENMP
1539      int tid=omp_get_thread_num();      int tid=omp_get_thread_num();
1540  #else  #else
1541      int tid=0;      int tid=0;
1542  #endif  #endif
1543      return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);  
1544    #ifdef LAZY_STACK_PROF
1545        stackstart[tid]=&tid;
1546        stackend[tid]=&tid;
1547        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1548        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1549        #pragma omp critical
1550        if (d>maxstackuse)
1551        {
1552    cout << "Max resolve Stack use " << d << endl;
1553            maxstackuse=d;
1554        }
1555        return r;
1556    #else
1557        return resolveNodeSample(tid, sampleNo, roffset);
1558    #endif
1559  }  }
1560    
1561    
# Line 2196  DataLazy::resolveToIdentity() Line 1565  DataLazy::resolveToIdentity()
1565  {  {
1566     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1567      return;      return;
 #ifndef LAZY_NODE_STORAGE  
    DataReady_ptr p=resolveVectorWorker();  
 #else  
1568     DataReady_ptr p=resolveNodeWorker();     DataReady_ptr p=resolveNodeWorker();
 #endif  
1569     makeIdentity(p);     makeIdentity(p);
1570  }  }
1571    
# Line 2216  void DataLazy::makeIdentity(const DataRe Line 1581  void DataLazy::makeIdentity(const DataRe
1581     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1582     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1583     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1584     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1585     m_left.reset();     m_left.reset();
1586     m_right.reset();     m_right.reset();
1587  }  }
# Line 2231  DataLazy::resolve() Line 1594  DataLazy::resolve()
1594      return m_id;      return m_id;
1595  }  }
1596    
 #ifdef LAZY_NODE_STORAGE  
1597    
1598  // This version of resolve uses storage in each node to hold results  /* This is really a static method but I think that caused problems in windows */
1599  DataReady_ptr  void
1600  DataLazy::resolveNodeWorker()  DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1601  {  {
1602    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (dats.empty())
1603    {    {
1604      collapse();      return;
1605    }    }
1606    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    vector<DataLazy*> work;
1607      FunctionSpace fs=dats[0]->getFunctionSpace();
1608      bool match=true;
1609      for (int i=dats.size()-1;i>=0;--i)
1610    {    {
1611      return m_id;      if (dats[i]->m_readytype!='E')
1612        {
1613            dats[i]->collapse();
1614        }
1615        if (dats[i]->m_op!=IDENTITY)
1616        {
1617            work.push_back(dats[i]);
1618            if (fs!=dats[i]->getFunctionSpace())
1619            {
1620                match=false;
1621            }
1622        }
1623    }    }
1624      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'    if (work.empty())
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVectorRW();  
   DataReady_ptr resptr=DataReady_ptr(result);  
   
   int sample;  
   int totalsamples=getNumSamples();  
   const ValueType* res=0;   // Storage for answer  
 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  
   #pragma omp parallel for private(sample,res) schedule(static)  
   for (sample=0;sample<totalsamples;++sample)  
1625    {    {
1626      size_t roffset=0;      return;     // no work to do
1627      }
1628      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1629      {     // it is possible that dats[0] is one of the objects which we discarded and
1630            // all the other functionspaces match.
1631        vector<DataExpanded*> dep;
1632        vector<ValueType*> vecs;
1633        for (int i=0;i<work.size();++i)
1634        {
1635            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1636            vecs.push_back(&(dep[i]->getVectorRW()));
1637        }
1638        int totalsamples=work[0]->getNumSamples();
1639        const ValueType* res=0; // Storage for answer
1640        int sample;
1641        #pragma omp parallel private(sample, res)
1642        {
1643            size_t roffset=0;
1644            #pragma omp for schedule(static)
1645            for (sample=0;sample<totalsamples;++sample)
1646            {
1647            roffset=0;
1648            int j;
1649            for (j=work.size()-1;j>0;--j)
1650            {
1651  #ifdef _OPENMP  #ifdef _OPENMP
1652      res=resolveNodeSample(omp_get_thread_num(),sample,roffset);                  res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1653  #else  #else
1654      res=resolveNodeSample(0,sample,roffset);                  res=work[j]->resolveNodeSample(0,sample,roffset);
1655  #endif  #endif
1656  LAZYDEBUG(cout << "Sample #" << sample << endl;)                  DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1657  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )                  memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1658      DataVector::size_type outoffset=result->getPointOffset(sample,0);          }
1659      memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));          }
1660        }
1661        // Now we need to load the new results as identity ops into the lazy nodes
1662        for (int i=work.size()-1;i>=0;--i)
1663        {
1664            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1665        }
1666      }
1667      else  // functionspaces do not match
1668      {
1669        for (int i=0;i<work.size();++i)
1670        {
1671            work[i]->resolveToIdentity();
1672        }
1673    }    }
   return resptr;  
1674  }  }
1675    
 #endif // LAZY_NODE_STORAGE  
1676    
1677  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1678  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1679  DataReady_ptr  DataReady_ptr
1680  DataLazy::resolveVectorWorker()  DataLazy::resolveNodeWorker()
1681  {  {
   
 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  
1682    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1683    {    {
1684      collapse();      collapse();
# Line 2290  LAZYDEBUG(cout << "Buffers=" << m_buffsR Line 1688  LAZYDEBUG(cout << "Buffers=" << m_buffsR
1688      return m_id;      return m_id;
1689    }    }
1690      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
1691    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1692    ValueType& resvec=result->getVectorRW();    ValueType& resvec=result->getVectorRW();
1693    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1694    
1695    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1696    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1697    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
   size_t resoffset=0;       // where in the vector to find the answer  
1698  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1699    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1700    {    {
1701  LAZYDEBUG(cout << "################################# " << sample << endl;)      size_t roffset=0;
1702    #ifdef LAZY_STACK_PROF
1703        stackstart[omp_get_thread_num()]=&roffset;
1704        stackend[omp_get_thread_num()]=&roffset;
1705    #endif
1706        #pragma omp for schedule(static)
1707        for (sample=0;sample<totalsamples;++sample)
1708        {
1709            roffset=0;
1710  #ifdef _OPENMP  #ifdef _OPENMP
1711      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1712  #else  #else
1713      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1714  #endif  #endif
1715  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1716  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1717      outoffset=result->getPointOffset(sample,0);              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1718  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1719      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      }
     {  
 LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  
     resvec[outoffset]=(*res)[resoffset];  
     }  
 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)  
 LAZYDEBUG(cerr << "*********************************" << endl;)  
1720    }    }
1721    #ifdef LAZY_STACK_PROF
1722      for (int i=0;i<getNumberOfThreads();++i)
1723      {
1724        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1725    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1726        if (r>maxstackuse)
1727        {
1728            maxstackuse=r;
1729        }
1730      }
1731      cout << "Max resolve Stack use=" << maxstackuse << endl;
1732    #endif
1733    return resptr;    return resptr;
1734  }  }
1735    
# Line 2335  std::string Line 1737  std::string
1737  DataLazy::toString() const  DataLazy::toString() const
1738  {  {
1739    ostringstream oss;    ostringstream oss;
1740    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1741    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1742      {
1743      case 1:   // tree format
1744        oss << endl;
1745        intoTreeString(oss,"");
1746        break;
1747      case 2:   // just the depth
1748        break;
1749      default:
1750        intoString(oss);
1751        break;
1752      }
1753    return oss.str();    return oss.str();
1754  }  }
1755    
# Line 2377  DataLazy::intoString(ostringstream& oss) Line 1790  DataLazy::intoString(ostringstream& oss)
1790    case G_UNARY_P:    case G_UNARY_P:
1791    case G_NP1OUT:    case G_NP1OUT:
1792    case G_NP1OUT_P:    case G_NP1OUT_P:
1793      case G_REDUCTION:
1794      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1795      m_left->intoString(oss);      m_left->intoString(oss);
1796      oss << ')';      oss << ')';
# Line 2399  DataLazy::intoString(ostringstream& oss) Line 1813  DataLazy::intoString(ostringstream& oss)
1813    }    }
1814  }  }
1815    
1816    
1817    void
1818    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1819    {
1820      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1821      switch (getOpgroup(m_op))
1822      {
1823      case G_IDENTITY:
1824        if (m_id->isExpanded())
1825        {
1826           oss << "E";
1827        }
1828        else if (m_id->isTagged())
1829        {
1830          oss << "T";
1831        }
1832        else if (m_id->isConstant())
1833        {
1834          oss << "C";
1835        }
1836        else
1837        {
1838          oss << "?";
1839        }
1840        oss << '@' << m_id.get() << endl;
1841        break;
1842      case G_BINARY:
1843        oss << opToString(m_op) << endl;
1844        indent+='.';
1845        m_left->intoTreeString(oss, indent);
1846        m_right->intoTreeString(oss, indent);
1847        break;
1848      case G_UNARY:
1849      case G_UNARY_P:
1850      case G_NP1OUT:
1851      case G_NP1OUT_P:
1852      case G_REDUCTION:
1853        oss << opToString(m_op) << endl;
1854        indent+='.';
1855        m_left->intoTreeString(oss, indent);
1856        break;
1857      case G_TENSORPROD:
1858        oss << opToString(m_op) << endl;
1859        indent+='.';
1860        m_left->intoTreeString(oss, indent);
1861        m_right->intoTreeString(oss, indent);
1862        break;
1863      case G_NP1OUT_2P:
1864        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1865        indent+='.';
1866        m_left->intoTreeString(oss, indent);
1867        break;
1868      default:
1869        oss << "UNKNOWN";
1870      }
1871    }
1872    
1873    
1874  DataAbstract*  DataAbstract*
1875  DataLazy::deepCopy()  DataLazy::deepCopy()
1876  {  {
1877    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1878    {    {
1879    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1880    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1881      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1882      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1883    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1884    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1885    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1886      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1887      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1888    default:    default:
1889      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1890    }    }
1891  }  }
1892    
1893    
1894    
1895  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
1896  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
1897  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;

Legend:
Removed from v.2514  
changed lines
  Added in v.2824

  ViewVC Help
Powered by ViewVC 1.1.26