/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1926 by jfenwick, Fri Oct 24 05:54:08 2008 UTC trunk/escript/src/DataLazy.cpp revision 2737 by jfenwick, Tue Nov 3 00:44:00 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #include <iomanip>      // for some fancy formatting in debug
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49    
50  /*  /*
51  How does DataLazy work?  How does DataLazy work?
# Line 47  I will refer to individual DataLazy obje Line 67  I will refer to individual DataLazy obje
67  Each node also stores:  Each node also stores:
68  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69      evaluated.      evaluated.
 - m_length ~ how many values would be stored in the answer if the expression was evaluated.  
70  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71      evaluate the expression.      evaluate the expression.
72  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
# Line 69  The convention that I use, is that the r Line 88  The convention that I use, is that the r
88    
89  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91    
92    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93    1) Add to the ES_optype.
94    2) determine what opgroup your operation belongs to (X)
95    3) add a string for the op to the end of ES_opstrings
96    4) increase ES_opcount
97    5) add an entry (X) to opgroups
98    6) add an entry to the switch in collapseToReady
99    7) add an entry to resolveX
100  */  */
101    
102    
# Line 78  using namespace boost; Line 106  using namespace boost;
106  namespace escript  namespace escript
107  {  {
108    
 const std::string&  
 opToString(ES_optype op);  
   
109  namespace  namespace
110  {  {
111    
# Line 89  enum ES_opgroup Line 114  enum ES_opgroup
114     G_UNKNOWN,     G_UNKNOWN,
115     G_IDENTITY,     G_IDENTITY,
116     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
117     G_UNARY      // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
118       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
119       G_NP1OUT,        // non-pointwise op with one output
120       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
121       G_TENSORPROD,    // general tensor product
122       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
123       G_REDUCTION      // non-pointwise unary op with a scalar output
124  };  };
125    
126    
# Line 100  string ES_opstrings[]={"UNKNOWN","IDENTI Line 131  string ES_opstrings[]={"UNKNOWN","IDENTI
131              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
132              "asinh","acosh","atanh",              "asinh","acosh","atanh",
133              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
134              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
135  int ES_opcount=33;              "symmetric","nonsymmetric",
136                "prod",
137                "transpose", "trace",
138                "swapaxes",
139                "minval", "maxval"};
140    int ES_opcount=43;
141  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
142              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
143              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
144              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
145              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
146              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
147                G_NP1OUT,G_NP1OUT,
148                G_TENSORPROD,
149                G_NP1OUT_P, G_NP1OUT_P,
150                G_NP1OUT_2P,
151                G_REDUCTION, G_REDUCTION};
152  inline  inline
153  ES_opgroup  ES_opgroup
154  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 124  resultFS(DataAbstract_ptr left, DataAbst Line 165  resultFS(DataAbstract_ptr left, DataAbst
165      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
166      // programming error exception.      // programming error exception.
167    
168      FunctionSpace l=left->getFunctionSpace();
169      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
170      {    if (l!=r)
171          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
172      }      if (r.probeInterpolation(l))
173      return left->getFunctionSpace();      {
174        return l;
175        }
176        if (l.probeInterpolation(r))
177        {
178        return r;
179        }
180        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
181      }
182      return l;
183  }  }
184    
185  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
186    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
187  DataTypes::ShapeType  DataTypes::ShapeType
188  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
189  {  {
190      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
191      {      {
192        if (getOpgroup(op)!=G_BINARY)        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
193        {        {
194          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
195        }        }
196    
197        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
198        {        {
199          return right->getShape();          return right->getShape();
# Line 155  resultShape(DataAbstract_ptr left, DataA Line 207  resultShape(DataAbstract_ptr left, DataA
207      return left->getShape();      return left->getShape();
208  }  }
209    
210  // determine the number of points in the result of "left op right"  // return the shape for "op left"
211  size_t  
212  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataTypes::ShapeType
213    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
214  {  {
215     switch (getOpgroup(op))      switch(op)
216     {      {
217     case G_BINARY: return left->getLength();          case TRANS:
218     case G_UNARY: return left->getLength();         {            // for the scoping of variables
219     default:          const DataTypes::ShapeType& s=left->getShape();
220      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
221     }          int rank=left->getRank();
222            if (axis_offset<0 || axis_offset>rank)
223            {
224                   throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
225                }
226                for (int i=0; i<rank; i++)
227            {
228               int index = (axis_offset+i)%rank;
229                   sh.push_back(s[index]); // Append to new shape
230                }
231            return sh;
232           }
233        break;
234        case TRACE:
235           {
236            int rank=left->getRank();
237            if (rank<2)
238            {
239               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
240            }
241            if ((axis_offset>rank-2) || (axis_offset<0))
242            {
243               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
244            }
245            if (rank==2)
246            {
247               return DataTypes::scalarShape;
248            }
249            else if (rank==3)
250            {
251               DataTypes::ShapeType sh;
252                   if (axis_offset==0)
253               {
254                    sh.push_back(left->getShape()[2]);
255                   }
256                   else     // offset==1
257               {
258                sh.push_back(left->getShape()[0]);
259                   }
260               return sh;
261            }
262            else if (rank==4)
263            {
264               DataTypes::ShapeType sh;
265               const DataTypes::ShapeType& s=left->getShape();
266                   if (axis_offset==0)
267               {
268                    sh.push_back(s[2]);
269                    sh.push_back(s[3]);
270                   }
271                   else if (axis_offset==1)
272               {
273                    sh.push_back(s[0]);
274                    sh.push_back(s[3]);
275                   }
276               else     // offset==2
277               {
278                sh.push_back(s[0]);
279                sh.push_back(s[1]);
280               }
281               return sh;
282            }
283            else        // unknown rank
284            {
285               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
286            }
287           }
288        break;
289            default:
290        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
291        }
292    }
293    
294    DataTypes::ShapeType
295    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
296    {
297         // This code taken from the Data.cpp swapaxes() method
298         // Some of the checks are probably redundant here
299         int axis0_tmp,axis1_tmp;
300         const DataTypes::ShapeType& s=left->getShape();
301         DataTypes::ShapeType out_shape;
302         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
303         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
304         int rank=left->getRank();
305         if (rank<2) {
306            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
307         }
308         if (axis0<0 || axis0>rank-1) {
309            throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
310         }
311         if (axis1<0 || axis1>rank-1) {
312             throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
313         }
314         if (axis0 == axis1) {
315             throw DataException("Error - Data::swapaxes: axis indices must be different.");
316         }
317         if (axis0 > axis1) {
318             axis0_tmp=axis1;
319             axis1_tmp=axis0;
320         } else {
321             axis0_tmp=axis0;
322             axis1_tmp=axis1;
323         }
324         for (int i=0; i<rank; i++) {
325           if (i == axis0_tmp) {
326              out_shape.push_back(s[axis1_tmp]);
327           } else if (i == axis1_tmp) {
328              out_shape.push_back(s[axis0_tmp]);
329           } else {
330              out_shape.push_back(s[i]);
331           }
332         }
333        return out_shape;
334    }
335    
336    
337    // determine the output shape for the general tensor product operation
338    // the additional parameters return information required later for the product
339    // the majority of this code is copy pasted from C_General_Tensor_Product
340    DataTypes::ShapeType
341    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
342    {
343        
344      // Get rank and shape of inputs
345      int rank0 = left->getRank();
346      int rank1 = right->getRank();
347      const DataTypes::ShapeType& shape0 = left->getShape();
348      const DataTypes::ShapeType& shape1 = right->getShape();
349    
350      // Prepare for the loops of the product and verify compatibility of shapes
351      int start0=0, start1=0;
352      if (transpose == 0)       {}
353      else if (transpose == 1)  { start0 = axis_offset; }
354      else if (transpose == 2)  { start1 = rank1-axis_offset; }
355      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
356    
357      if (rank0<axis_offset)
358      {
359        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
360      }
361    
362      // Adjust the shapes for transpose
363      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
364      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
365      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
366      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
367    
368      // Prepare for the loops of the product
369      SL=1, SM=1, SR=1;
370      for (int i=0; i<rank0-axis_offset; i++)   {
371        SL *= tmpShape0[i];
372      }
373      for (int i=rank0-axis_offset; i<rank0; i++)   {
374        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
375          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
376        }
377        SM *= tmpShape0[i];
378      }
379      for (int i=axis_offset; i<rank1; i++)     {
380        SR *= tmpShape1[i];
381      }
382    
383      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
384      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
385      {         // block to limit the scope of out_index
386         int out_index=0;
387         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
388         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
389      }
390    
391      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
392      {
393         ostringstream os;
394         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
395         throw DataException(os.str());
396      }
397    
398      return shape2;
399  }  }
400    
401  // determine the number of samples requires to evaluate an expression combining left and right  // determine the number of samples requires to evaluate an expression combining left and right
402    // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
403    // The same goes for G_TENSORPROD
404    // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
405    // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
406    // multiple times
407  int  int
408  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
409  {  {
410     switch(getOpgroup(op))     switch(getOpgroup(op))
411     {     {
412     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
413     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
414     case G_UNARY: return max(left->getBuffsRequired(),1);     case G_REDUCTION:
415       case G_UNARY:
416       case G_UNARY_P: return max(left->getBuffsRequired(),1);
417       case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
418       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
419       case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
420       case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
421     default:     default:
422      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
423     }     }
# Line 198  opToString(ES_optype op) Line 439  opToString(ES_optype op)
439    return ES_opstrings[op];    return ES_opstrings[op];
440  }  }
441    
442    #ifdef LAZY_NODE_STORAGE
443    void DataLazy::LazyNodeSetup()
444    {
445    #ifdef _OPENMP
446        int numthreads=omp_get_max_threads();
447        m_samples.resize(numthreads*m_samplesize);
448        m_sampleids=new int[numthreads];
449        for (int i=0;i<numthreads;++i)
450        {
451            m_sampleids[i]=-1;  
452        }
453    #else
454        m_samples.resize(m_samplesize);
455        m_sampleids=new int[1];
456        m_sampleids[0]=-1;
457    #endif  // _OPENMP
458    }
459    #endif   // LAZY_NODE_STORAGE
460    
461    
462    // Creates an identity node
463  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
464      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
465      m_op(IDENTITY)  #ifdef LAZY_NODE_STORAGE
466        ,m_sampleids(0),
467        m_samples(1)
468    #endif
469  {  {
470     if (p->isLazy())     if (p->isLazy())
471     {     {
# Line 212  DataLazy::DataLazy(DataAbstract_ptr p) Line 476  DataLazy::DataLazy(DataAbstract_ptr p)
476     }     }
477     else     else
478     {     {
479      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
480      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
481      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
482      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
483     }     }
484     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
485  }  }
486    
   
   
   
487  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
488      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
489      m_op(op)      m_op(op),
490        m_axis_offset(0),
491        m_transpose(0),
492        m_SL(0), m_SM(0), m_SR(0)
493  {  {
494     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
495     {     {
496      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
497     }     }
498    
499     DataLazy_ptr lleft;     DataLazy_ptr lleft;
500     if (!left->isLazy())     if (!left->isLazy())
501     {     {
# Line 245  DataLazy::DataLazy(DataAbstract_ptr left Line 506  DataLazy::DataLazy(DataAbstract_ptr left
506      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
507     }     }
508     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
509     m_left=lleft;     m_left=lleft;
510     m_buffsRequired=1;     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
511     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
512       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
513       m_children=m_left->m_children+1;
514       m_height=m_left->m_height+1;
515    #ifdef LAZY_NODE_STORAGE
516       LazyNodeSetup();
517    #endif
518       SIZELIMIT
519  }  }
520    
521    
522    // In this constructor we need to consider interpolation
523  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
524      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
525      m_op(op)      m_op(op),
526        m_SL(0), m_SM(0), m_SR(0)
527  {  {
528     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
529       if ((getOpgroup(op)!=G_BINARY))
530     {     {
531      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
532     }     }
533    
534       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
535       {
536        FunctionSpace fs=getFunctionSpace();
537        Data ltemp(left);
538        Data tmp(ltemp,fs);
539        left=tmp.borrowDataPtr();
540       }
541       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
542       {
543        Data tmp(Data(right),getFunctionSpace());
544        right=tmp.borrowDataPtr();
545    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
546       }
547       left->operandCheck(*right);
548    
549     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
550     {     {
551      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
552    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
553     }     }
554     else     else
555     {     {
556      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
557    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
558     }     }
559     if (right->isLazy())     if (right->isLazy())
560     {     {
561      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
562    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
563     }     }
564     else     else
565     {     {
566      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
567    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
568     }     }
569     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
570     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 290  DataLazy::DataLazy(DataAbstract_ptr left Line 580  DataLazy::DataLazy(DataAbstract_ptr left
580     {     {
581      m_readytype='C';      m_readytype='C';
582     }     }
583     m_length=resultLength(m_left,m_right,m_op);     m_samplesize=getNumDPPSample()*getNoValues();
584     m_samplesize=getNumDPPSample()*getNoValues();         m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
585     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
586  cout << "(3)Lazy created with " << m_samplesize << endl;     m_children=m_left->m_children+m_right->m_children+2;
587       m_height=max(m_left->m_height,m_right->m_height)+1;
588    #ifdef LAZY_NODE_STORAGE
589       LazyNodeSetup();
590    #endif
591       SIZELIMIT
592    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
593  }  }
594    
595    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
596        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
597        m_op(op),
598        m_axis_offset(axis_offset),
599        m_transpose(transpose)
600    {
601       if ((getOpgroup(op)!=G_TENSORPROD))
602       {
603        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
604       }
605       if ((transpose>2) || (transpose<0))
606       {
607        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
608       }
609       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
610       {
611        FunctionSpace fs=getFunctionSpace();
612        Data ltemp(left);
613        Data tmp(ltemp,fs);
614        left=tmp.borrowDataPtr();
615       }
616       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
617       {
618        Data tmp(Data(right),getFunctionSpace());
619        right=tmp.borrowDataPtr();
620       }
621    //    left->operandCheck(*right);
622    
623       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
624       {
625        m_left=dynamic_pointer_cast<DataLazy>(left);
626       }
627       else
628       {
629        m_left=DataLazy_ptr(new DataLazy(left));
630       }
631       if (right->isLazy())
632       {
633        m_right=dynamic_pointer_cast<DataLazy>(right);
634       }
635       else
636       {
637        m_right=DataLazy_ptr(new DataLazy(right));
638       }
639       char lt=m_left->m_readytype;
640       char rt=m_right->m_readytype;
641       if (lt=='E' || rt=='E')
642       {
643        m_readytype='E';
644       }
645       else if (lt=='T' || rt=='T')
646       {
647        m_readytype='T';
648       }
649       else
650       {
651        m_readytype='C';
652       }
653       m_samplesize=getNumDPPSample()*getNoValues();
654       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
655       m_buffsRequired=calcBuffs(m_left, m_right,m_op);
656       m_children=m_left->m_children+m_right->m_children+2;
657       m_height=max(m_left->m_height,m_right->m_height)+1;
658    #ifdef LAZY_NODE_STORAGE
659       LazyNodeSetup();
660    #endif
661       SIZELIMIT
662    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
663    }
664    
665    
666    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
667        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
668        m_op(op),
669        m_axis_offset(axis_offset),
670        m_transpose(0),
671        m_tol(0)
672    {
673       if ((getOpgroup(op)!=G_NP1OUT_P))
674       {
675        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
676       }
677       DataLazy_ptr lleft;
678       if (!left->isLazy())
679       {
680        lleft=DataLazy_ptr(new DataLazy(left));
681       }
682       else
683       {
684        lleft=dynamic_pointer_cast<DataLazy>(left);
685       }
686       m_readytype=lleft->m_readytype;
687       m_left=lleft;
688       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
689       m_samplesize=getNumDPPSample()*getNoValues();
690       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
691       m_children=m_left->m_children+1;
692       m_height=m_left->m_height+1;
693    #ifdef LAZY_NODE_STORAGE
694       LazyNodeSetup();
695    #endif
696       SIZELIMIT
697    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
698    }
699    
700    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
701        : parent(left->getFunctionSpace(), left->getShape()),
702        m_op(op),
703        m_axis_offset(0),
704        m_transpose(0),
705        m_tol(tol)
706    {
707       if ((getOpgroup(op)!=G_UNARY_P))
708       {
709        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
710       }
711       DataLazy_ptr lleft;
712       if (!left->isLazy())
713       {
714        lleft=DataLazy_ptr(new DataLazy(left));
715       }
716       else
717       {
718        lleft=dynamic_pointer_cast<DataLazy>(left);
719       }
720       m_readytype=lleft->m_readytype;
721       m_left=lleft;
722       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
723       m_samplesize=getNumDPPSample()*getNoValues();
724       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
725       m_children=m_left->m_children+1;
726       m_height=m_left->m_height+1;
727    #ifdef LAZY_NODE_STORAGE
728       LazyNodeSetup();
729    #endif
730       SIZELIMIT
731    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
732    }
733    
734    
735    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
736        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
737        m_op(op),
738        m_axis_offset(axis0),
739        m_transpose(axis1),
740        m_tol(0)
741    {
742       if ((getOpgroup(op)!=G_NP1OUT_2P))
743       {
744        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
745       }
746       DataLazy_ptr lleft;
747       if (!left->isLazy())
748       {
749        lleft=DataLazy_ptr(new DataLazy(left));
750       }
751       else
752       {
753        lleft=dynamic_pointer_cast<DataLazy>(left);
754       }
755       m_readytype=lleft->m_readytype;
756       m_left=lleft;
757       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
758       m_samplesize=getNumDPPSample()*getNoValues();
759       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
760       m_children=m_left->m_children+1;
761       m_height=m_left->m_height+1;
762    #ifdef LAZY_NODE_STORAGE
763       LazyNodeSetup();
764    #endif
765       SIZELIMIT
766    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
767    }
768    
769  DataLazy::~DataLazy()  DataLazy::~DataLazy()
770  {  {
771    #ifdef LAZY_NODE_SETUP
772       delete[] m_sampleids;
773       delete[] m_samples;
774    #endif
775  }  }
776    
777    
# Line 309  DataLazy::getBuffsRequired() const Line 782  DataLazy::getBuffsRequired() const
782  }  }
783    
784    
785    size_t
786    DataLazy::getMaxSampleSize() const
787    {
788        return m_maxsamplesize;
789    }
790    
791    
792    
793    size_t
794    DataLazy::getSampleBufferSize() const
795    {
796        return m_maxsamplesize*(max(1,m_buffsRequired));
797    }
798    
799  /*  /*
800    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
801    This does the work for the collapse method.    This does the work for the collapse method.
# Line 328  DataLazy::collapseToReady() Line 815  DataLazy::collapseToReady()
815    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
816    Data left(pleft);    Data left(pleft);
817    Data right;    Data right;
818    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
819    {    {
820      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
821    }    }
# Line 427  DataLazy::collapseToReady() Line 914  DataLazy::collapseToReady()
914      case LEZ:      case LEZ:
915      result=left.whereNonPositive();      result=left.whereNonPositive();
916      break;      break;
917        case NEZ:
918        result=left.whereNonZero(m_tol);
919        break;
920        case EZ:
921        result=left.whereZero(m_tol);
922        break;
923        case SYM:
924        result=left.symmetric();
925        break;
926        case NSYM:
927        result=left.nonsymmetric();
928        break;
929        case PROD:
930        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
931        break;
932        case TRANS:
933        result=left.transpose(m_axis_offset);
934        break;
935        case TRACE:
936        result=left.trace(m_axis_offset);
937        break;
938        case SWAP:
939        result=left.swapaxes(m_axis_offset, m_transpose);
940        break;
941        case MINVAL:
942        result=left.minval();
943        break;
944        case MAXVAL:
945        result=left.minval();
946        break;
947      default:      default:
948      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
949    }    }
950    return result.borrowReadyPtr();    return result.borrowReadyPtr();
951  }  }
# Line 455  DataLazy::collapse() Line 972  DataLazy::collapse()
972  }  }
973    
974  /*  /*
975    \brief Compute the value of the expression (binary operation) for the given sample.    \brief Compute the value of the expression (unary operation) for the given sample.
976    \return Vector which stores the value of the subexpression for the given sample.    \return Vector which stores the value of the subexpression for the given sample.
977    \param v A vector to store intermediate results.    \param v A vector to store intermediate results.
978    \param offset Index in v to begin storing results.    \param offset Index in v to begin storing results.
# Line 476  DataLazy::resolveUnary(ValueType& v, siz Line 993  DataLazy::resolveUnary(ValueType& v, siz
993    {    {
994      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
995    }    }
996    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
997    const double* left=&((*vleft)[roffset]);    const double* left=&((*vleft)[roffset]);
998    double* result=&(v[offset]);    double* result=&(v[offset]);
999    roffset=offset;    roffset=offset;
1000    switch (m_op)    switch (m_op)
1001    {    {
1002      case SIN:        case SIN:  
1003      tensor_unary_operation(m_samplesize, left, result, ::sin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1004      break;      break;
1005      case COS:      case COS:
1006      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1007      break;      break;
1008      case TAN:      case TAN:
1009      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1010      break;      break;
1011      case ASIN:      case ASIN:
1012      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1013      break;      break;
1014      case ACOS:      case ACOS:
1015      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1016      break;      break;
1017      case ATAN:      case ATAN:
1018      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1019      break;      break;
1020      case SINH:      case SINH:
1021      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1022      break;      break;
1023      case COSH:      case COSH:
1024      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1025      break;      break;
1026      case TANH:      case TANH:
1027      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1028      break;      break;
1029      case ERF:      case ERF:
1030  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1031      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1032  #else  #else
1033      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1034      break;      break;
1035  #endif  #endif
1036     case ASINH:     case ASINH:
1037  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1038      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1039  #else  #else
1040      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1041  #endif    #endif  
1042      break;      break;
1043     case ACOSH:     case ACOSH:
1044  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1045      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1046  #else  #else
1047      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1048  #endif    #endif  
1049      break;      break;
1050     case ATANH:     case ATANH:
1051  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1052      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1053  #else  #else
1054      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1055  #endif    #endif  
1056      break;      break;
1057      case LOG10:      case LOG10:
1058      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1059      break;      break;
1060      case LOG:      case LOG:
1061      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1062      break;      break;
1063      case SIGN:      case SIGN:
1064      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1065      break;      break;
1066      case ABS:      case ABS:
1067      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1068      break;      break;
1069      case NEG:      case NEG:
1070      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 558  DataLazy::resolveUnary(ValueType& v, siz Line 1075  DataLazy::resolveUnary(ValueType& v, siz
1075      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1076      break;      break;
1077      case EXP:      case EXP:
1078      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1079      break;      break;
1080      case SQRT:      case SQRT:
1081      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1082      break;      break;
1083      case RECIP:      case RECIP:
1084      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 578  DataLazy::resolveUnary(ValueType& v, siz Line 1095  DataLazy::resolveUnary(ValueType& v, siz
1095      case LEZ:      case LEZ:
1096      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1097      break;      break;
1098    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1099        case NEZ:
1100        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1101        break;
1102        case EZ:
1103        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1104        break;
1105    
1106      default:      default:
1107      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
# Line 586  DataLazy::resolveUnary(ValueType& v, siz Line 1110  DataLazy::resolveUnary(ValueType& v, siz
1110  }  }
1111    
1112    
1113    /*
1114      \brief Compute the value of the expression (reduction operation) for the given sample.
1115      \return Vector which stores the value of the subexpression for the given sample.
1116      \param v A vector to store intermediate results.
1117      \param offset Index in v to begin storing results.
1118      \param sampleNo Sample number to evaluate.
1119      \param roffset (output parameter) the offset in the return vector where the result begins.
1120    
1121      The return value will be an existing vector so do not deallocate it.
1122      If the result is stored in v it should be stored at the offset given.
1123      Everything from offset to the end of v should be considered available for this method to use.
1124    */
1125    DataTypes::ValueType*
1126    DataLazy::resolveReduction(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1127    {
1128        // we assume that any collapsing has been done before we get here
1129        // since we only have one argument we don't need to think about only
1130        // processing single points.
1131      if (m_readytype!='E')
1132      {
1133        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1134      }
1135      const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
1136      double* result=&(v[offset]);
1137      roffset=offset;
1138      unsigned int ndpps=getNumDPPSample();
1139      unsigned int psize=DataTypes::noValues(getShape());
1140      switch (m_op)
1141      {
1142        case MINVAL:
1143        {
1144          for (unsigned int z=0;z<ndpps;++z)
1145          {
1146             FMin op;
1147             *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max());
1148             roffset+=psize;
1149             result++;
1150          }
1151        }
1152        break;
1153        case MAXVAL:
1154        {
1155          for (unsigned int z=0;z<ndpps;++z)
1156          {
1157             FMax op;
1158             *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max()*-1);
1159             roffset+=psize;
1160             result++;
1161          }
1162        }
1163        break;
1164        default:
1165        throw DataException("Programmer error - resolveReduction can not resolve operator "+opToString(m_op)+".");
1166      }
1167      return &v;
1168    }
1169    
1170    
1171    
1172    /*
1173      \brief Compute the value of the expression (unary operation) for the given sample.
1174      \return Vector which stores the value of the subexpression for the given sample.
1175      \param v A vector to store intermediate results.
1176      \param offset Index in v to begin storing results.
1177      \param sampleNo Sample number to evaluate.
1178      \param roffset (output parameter) the offset in the return vector where the result begins.
1179    
1180      The return value will be an existing vector so do not deallocate it.
1181      If the result is stored in v it should be stored at the offset given.
1182      Everything from offset to the end of v should be considered available for this method to use.
1183    */
1184    DataTypes::ValueType*
1185    DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1186    {
1187        // we assume that any collapsing has been done before we get here
1188        // since we only have one argument we don't need to think about only
1189        // processing single points.
1190      if (m_readytype!='E')
1191      {
1192        throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
1193      }
1194        // since we can't write the result over the input, we need a result offset further along
1195      size_t subroffset=roffset+m_samplesize;
1196    LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1197      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1198      roffset=offset;
1199      size_t loop=0;
1200      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1201      size_t step=getNoValues();
1202      switch (m_op)
1203      {
1204        case SYM:
1205        for (loop=0;loop<numsteps;++loop)
1206        {
1207            DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1208            subroffset+=step;
1209            offset+=step;
1210        }
1211        break;
1212        case NSYM:
1213        for (loop=0;loop<numsteps;++loop)
1214        {
1215            DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1216            subroffset+=step;
1217            offset+=step;
1218        }
1219        break;
1220        default:
1221        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1222      }
1223      return &v;
1224    }
1225    
1226    /*
1227      \brief Compute the value of the expression (unary operation) for the given sample.
1228      \return Vector which stores the value of the subexpression for the given sample.
1229      \param v A vector to store intermediate results.
1230      \param offset Index in v to begin storing results.
1231      \param sampleNo Sample number to evaluate.
1232      \param roffset (output parameter) the offset in the return vector where the result begins.
1233    
1234      The return value will be an existing vector so do not deallocate it.
1235      If the result is stored in v it should be stored at the offset given.
1236      Everything from offset to the end of v should be considered available for this method to use.
1237    */
1238    DataTypes::ValueType*
1239    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1240    {
1241        // we assume that any collapsing has been done before we get here
1242        // since we only have one argument we don't need to think about only
1243        // processing single points.
1244      if (m_readytype!='E')
1245      {
1246        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1247      }
1248        // since we can't write the result over the input, we need a result offset further along
1249      size_t subroffset;
1250      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1251    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1252    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1253      roffset=offset;
1254      size_t loop=0;
1255      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1256      size_t outstep=getNoValues();
1257      size_t instep=m_left->getNoValues();
1258    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1259      switch (m_op)
1260      {
1261        case TRACE:
1262        for (loop=0;loop<numsteps;++loop)
1263        {
1264    size_t zz=sampleNo*getNumDPPSample()+loop;
1265    if (zz==5800)
1266    {
1267    LAZYDEBUG(cerr << "point=" <<  zz<< endl;)
1268    LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1269    LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1270    LAZYDEBUG(cerr << subroffset << endl;)
1271    LAZYDEBUG(cerr << "output=" << offset << endl;)
1272    }
1273                DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1274    if (zz==5800)
1275    {
1276    LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1277    }
1278            subroffset+=instep;
1279            offset+=outstep;
1280        }
1281        break;
1282        case TRANS:
1283        for (loop=0;loop<numsteps;++loop)
1284        {
1285                DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1286            subroffset+=instep;
1287            offset+=outstep;
1288        }
1289        break;
1290        default:
1291        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1292      }
1293      return &v;
1294    }
1295    
1296    
1297    /*
1298      \brief Compute the value of the expression (unary operation with int params) for the given sample.
1299      \return Vector which stores the value of the subexpression for the given sample.
1300      \param v A vector to store intermediate results.
1301      \param offset Index in v to begin storing results.
1302      \param sampleNo Sample number to evaluate.
1303      \param roffset (output parameter) the offset in the return vector where the result begins.
1304    
1305      The return value will be an existing vector so do not deallocate it.
1306      If the result is stored in v it should be stored at the offset given.
1307      Everything from offset to the end of v should be considered available for this method to use.
1308    */
1309    DataTypes::ValueType*
1310    DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1311    {
1312        // we assume that any collapsing has been done before we get here
1313        // since we only have one argument we don't need to think about only
1314        // processing single points.
1315      if (m_readytype!='E')
1316      {
1317        throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1318      }
1319        // since we can't write the result over the input, we need a result offset further along
1320      size_t subroffset;
1321      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1322    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1323    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1324      roffset=offset;
1325      size_t loop=0;
1326      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1327      size_t outstep=getNoValues();
1328      size_t instep=m_left->getNoValues();
1329    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1330      switch (m_op)
1331      {
1332        case SWAP:
1333        for (loop=0;loop<numsteps;++loop)
1334        {
1335                DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1336            subroffset+=instep;
1337            offset+=outstep;
1338        }
1339        break;
1340        default:
1341        throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1342      }
1343      return &v;
1344    }
1345    
1346    
1347  #define PROC_OP(X) \  
1348      for (int i=0;i<steps;++i,resultp+=resultStep) \  #define PROC_OP(TYPE,X)                               \
1349      { \      for (int j=0;j<onumsteps;++j)\
1350         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \      {\
1351         lroffset+=leftStep; \        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1352         rroffset+=rightStep; \        { \
1353    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1354    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1355             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1356    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1357             lroffset+=leftstep; \
1358             rroffset+=rightstep; \
1359          }\
1360          lroffset+=oleftstep;\
1361          rroffset+=orightstep;\
1362      }      }
1363    
1364  /*  /*
# Line 621  DataLazy::resolveUnary(ValueType& v, siz Line 1385  DataLazy::resolveUnary(ValueType& v, siz
1385  DataTypes::ValueType*  DataTypes::ValueType*
1386  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1387  {  {
1388  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1389    
1390    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1391      // first work out which of the children are expanded      // first work out which of the children are expanded
1392    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1393    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1394    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1395    int steps=(bigloops?1:getNumDPPSample());    {
1396    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1397    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1398    {    bool leftScalar=(m_left->getRank()==0);
1399      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1400      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1401      chunksize=1;    // for scalar    {
1402    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1403    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1404    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1405    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1406      size_t chunksize=1;           // how many doubles will be processed in one go
1407      int leftstep=0;       // how far should the left offset advance after each step
1408      int rightstep=0;
1409      int numsteps=0;       // total number of steps for the inner loop
1410      int oleftstep=0;  // the o variables refer to the outer loop
1411      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1412      int onumsteps=1;
1413      
1414      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1415      bool RES=(rightExp && rightScalar);
1416      bool LS=(!leftExp && leftScalar); // left is a single scalar
1417      bool RS=(!rightExp && rightScalar);
1418      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1419      bool RN=(!rightExp && !rightScalar);
1420      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1421      bool REN=(rightExp && !rightScalar);
1422    
1423      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1424      {
1425        chunksize=m_left->getNumDPPSample()*leftsize;
1426        leftstep=0;
1427        rightstep=0;
1428        numsteps=1;
1429      }
1430      else if (LES || RES)
1431      {
1432        chunksize=1;
1433        if (LES)        // left is an expanded scalar
1434        {
1435            if (RS)
1436            {
1437               leftstep=1;
1438               rightstep=0;
1439               numsteps=m_left->getNumDPPSample();
1440            }
1441            else        // RN or REN
1442            {
1443               leftstep=0;
1444               oleftstep=1;
1445               rightstep=1;
1446               orightstep=(RN ? -(int)rightsize : 0);
1447               numsteps=rightsize;
1448               onumsteps=m_left->getNumDPPSample();
1449            }
1450        }
1451        else        // right is an expanded scalar
1452        {
1453            if (LS)
1454            {
1455               rightstep=1;
1456               leftstep=0;
1457               numsteps=m_right->getNumDPPSample();
1458            }
1459            else
1460            {
1461               rightstep=0;
1462               orightstep=1;
1463               leftstep=1;
1464               oleftstep=(LN ? -(int)leftsize : 0);
1465               numsteps=leftsize;
1466               onumsteps=m_right->getNumDPPSample();
1467            }
1468        }
1469      }
1470      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1471      {
1472        if (LEN)    // and Right will be a single value
1473        {
1474            chunksize=rightsize;
1475            leftstep=rightsize;
1476            rightstep=0;
1477            numsteps=m_left->getNumDPPSample();
1478            if (RS)
1479            {
1480               numsteps*=leftsize;
1481            }
1482        }
1483        else    // REN
1484        {
1485            chunksize=leftsize;
1486            rightstep=leftsize;
1487            leftstep=0;
1488            numsteps=m_right->getNumDPPSample();
1489            if (LS)
1490            {
1491               numsteps*=rightsize;
1492            }
1493        }
1494      }
1495    
1496      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1497      // Get the values of sub-expressions      // Get the values of sub-expressions
1498    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1499    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note      // calcBufss for why we can't put offset as the 2nd param above
1500      const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1501      // the right child starts further along.      // the right child starts further along.
1502    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1503    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1504    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1505    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1506    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1507    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1508    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1509    
1510    
1511    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1512    switch(m_op)    switch(m_op)
1513    {    {
1514      case ADD:      case ADD:
1515      PROC_OP(plus<double>());          PROC_OP(NO_ARG,plus<double>());
1516      break;      break;
1517      case SUB:      case SUB:
1518      PROC_OP(minus<double>());      PROC_OP(NO_ARG,minus<double>());
1519      break;      break;
1520      case MUL:      case MUL:
1521      PROC_OP(multiplies<double>());      PROC_OP(NO_ARG,multiplies<double>());
1522      break;      break;
1523      case DIV:      case DIV:
1524      PROC_OP(divides<double>());      PROC_OP(NO_ARG,divides<double>());
1525      break;      break;
1526      case POW:      case POW:
1527      PROC_OP(::pow);         PROC_OP(double (double,double),::pow);
1528      break;      break;
1529      default:      default:
1530      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1531    }    }
1532    roffset=offset;      roffset=offset;
1533    return &v;    return &v;
1534  }  }
1535    
1536    
1537    
1538  /*  /*
1539      \brief Compute the value of the expression (tensor product) for the given sample.
1540      \return Vector which stores the value of the subexpression for the given sample.
1541      \param v A vector to store intermediate results.
1542      \param offset Index in v to begin storing results.
1543      \param sampleNo Sample number to evaluate.
1544      \param roffset (output parameter) the offset in the return vector where the result begins.
1545    
1546      The return value will be an existing vector so do not deallocate it.
1547      If the result is stored in v it should be stored at the offset given.
1548      Everything from offset to the end of v should be considered available for this method to use.
1549    */
1550    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1551    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1552    // unlike the other resolve helpers, we must treat these datapoints separately.
1553    DataTypes::ValueType*
1554    DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1555    {
1556    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)
1557    
1558      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1559        // first work out which of the children are expanded
1560      bool leftExp=(m_left->m_readytype=='E');
1561      bool rightExp=(m_right->m_readytype=='E');
1562      int steps=getNumDPPSample();
1563    /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1564      int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1565      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1566      int rightStep=(rightExp?m_right->getNoValues() : 0);
1567    
1568      int resultStep=getNoValues();
1569        // Get the values of sub-expressions (leave a gap of one sample for the result).
1570      int gap=offset+m_samplesize;  
1571    
1572    LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1573    
1574      const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);
1575      gap+=m_left->getMaxSampleSize();
1576    
1577    
1578    LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1579    
1580    
1581      const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);
1582    
1583    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1584    cout << getNoValues() << endl;)
1585    LAZYDEBUG(cerr << "Result of left=";)
1586    LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1587    
1588    for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1589    {
1590    cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1591    if (i%4==0) cout << endl;
1592    })
1593    LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1594    LAZYDEBUG(
1595    for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1596    {
1597    cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1598    if (i%4==0) cout << endl;
1599    }
1600    cerr << endl;
1601    )
1602    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1603    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1604    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1605    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1606    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1607    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1608    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1609    
1610      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1611      switch(m_op)
1612      {
1613        case PROD:
1614        for (int i=0;i<steps;++i,resultp+=resultStep)
1615        {
1616    
1617    LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1618    LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1619    LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1620    
1621              const double *ptr_0 = &((*left)[lroffset]);
1622              const double *ptr_1 = &((*right)[rroffset]);
1623    
1624    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1625    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1626    
1627              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1628    
1629    LAZYDEBUG(cout << "Results--\n";
1630    {
1631      DataVector dv(getNoValues());
1632    for (int z=0;z<getNoValues();++z)
1633    {
1634      cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1635      if (z%4==0) cout << endl;
1636      dv[z]=resultp[z];
1637    }
1638    cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1639    cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1640    }
1641    )
1642          lroffset+=leftStep;
1643          rroffset+=rightStep;
1644        }
1645        break;
1646        default:
1647        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1648      }
1649      roffset=offset;
1650      return &v;
1651    }
1652    
1653    
1654    #ifdef LAZY_NODE_STORAGE
1655    
1656    // The result will be stored in m_samples
1657    // The return value is a pointer to the DataVector, offset is the offset within the return value
1658    const DataTypes::ValueType*
1659    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1660    {
1661    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1662        // collapse so we have a 'E' node or an IDENTITY for some other type
1663      if (m_readytype!='E' && m_op!=IDENTITY)
1664      {
1665        collapse();
1666      }
1667      if (m_op==IDENTITY)  
1668      {
1669        const ValueType& vec=m_id->getVectorRO();
1670    //     if (m_readytype=='C')
1671    //     {
1672    //  roffset=0;      // all samples read from the same position
1673    //  return &(m_samples);
1674    //     }
1675        roffset=m_id->getPointOffset(sampleNo, 0);
1676        return &(vec);
1677      }
1678      if (m_readytype!='E')
1679      {
1680        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1681      }
1682      if (m_sampleids[tid]==sampleNo)
1683      {
1684        roffset=tid*m_samplesize;
1685        return &(m_samples);        // sample is already resolved
1686      }
1687      m_sampleids[tid]=sampleNo;
1688      switch (getOpgroup(m_op))
1689      {
1690      case G_UNARY:
1691      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1692      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1693      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1694      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1695      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1696      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1697      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1698      default:
1699        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1700      }
1701    }
1702    
1703    const DataTypes::ValueType*
1704    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1705    {
1706        // we assume that any collapsing has been done before we get here
1707        // since we only have one argument we don't need to think about only
1708        // processing single points.
1709        // we will also know we won't get identity nodes
1710      if (m_readytype!='E')
1711      {
1712        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1713      }
1714      if (m_op==IDENTITY)
1715      {
1716        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1717      }
1718      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1719      const double* left=&((*leftres)[roffset]);
1720      roffset=m_samplesize*tid;
1721      double* result=&(m_samples[roffset]);
1722      switch (m_op)
1723      {
1724        case SIN:  
1725        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1726        break;
1727        case COS:
1728        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1729        break;
1730        case TAN:
1731        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1732        break;
1733        case ASIN:
1734        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1735        break;
1736        case ACOS:
1737        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1738        break;
1739        case ATAN:
1740        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1741        break;
1742        case SINH:
1743        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1744        break;
1745        case COSH:
1746        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1747        break;
1748        case TANH:
1749        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1750        break;
1751        case ERF:
1752    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1753        throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1754    #else
1755        tensor_unary_operation(m_samplesize, left, result, ::erf);
1756        break;
1757    #endif
1758       case ASINH:
1759    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1760        tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1761    #else
1762        tensor_unary_operation(m_samplesize, left, result, ::asinh);
1763    #endif  
1764        break;
1765       case ACOSH:
1766    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1767        tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1768    #else
1769        tensor_unary_operation(m_samplesize, left, result, ::acosh);
1770    #endif  
1771        break;
1772       case ATANH:
1773    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1774        tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1775    #else
1776        tensor_unary_operation(m_samplesize, left, result, ::atanh);
1777    #endif  
1778        break;
1779        case LOG10:
1780        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1781        break;
1782        case LOG:
1783        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1784        break;
1785        case SIGN:
1786        tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1787        break;
1788        case ABS:
1789        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1790        break;
1791        case NEG:
1792        tensor_unary_operation(m_samplesize, left, result, negate<double>());
1793        break;
1794        case POS:
1795        // it doesn't mean anything for delayed.
1796        // it will just trigger a deep copy of the lazy object
1797        throw DataException("Programmer error - POS not supported for lazy data.");
1798        break;
1799        case EXP:
1800        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1801        break;
1802        case SQRT:
1803        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1804        break;
1805        case RECIP:
1806        tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1807        break;
1808        case GZ:
1809        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1810        break;
1811        case LZ:
1812        tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1813        break;
1814        case GEZ:
1815        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1816        break;
1817        case LEZ:
1818        tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1819        break;
1820    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1821        case NEZ:
1822        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1823        break;
1824        case EZ:
1825        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1826        break;
1827    
1828        default:
1829        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1830      }
1831      return &(m_samples);
1832    }
1833    
1834    
1835    const DataTypes::ValueType*
1836    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1837    {
1838        // we assume that any collapsing has been done before we get here
1839        // since we only have one argument we don't need to think about only
1840        // processing single points.
1841        // we will also know we won't get identity nodes
1842      if (m_readytype!='E')
1843      {
1844        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1845      }
1846      if (m_op==IDENTITY)
1847      {
1848        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1849      }
1850      size_t loffset=0;
1851      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1852    
1853      roffset=m_samplesize*tid;
1854      unsigned int ndpps=getNumDPPSample();
1855      unsigned int psize=DataTypes::noValues(getShape());
1856      double* result=&(m_samples[roffset]);
1857      switch (m_op)
1858      {
1859        case MINVAL:
1860        {
1861          for (unsigned int z=0;z<ndpps;++z)
1862          {
1863            FMin op;
1864            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1865            loffset+=psize;
1866            result++;
1867          }
1868        }
1869        break;
1870        case MAXVAL:
1871        {
1872          for (unsigned int z=0;z<ndpps;++z)
1873          {
1874          FMax op;
1875          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1876          loffset+=psize;
1877          result++;
1878          }
1879        }
1880        break;
1881        default:
1882        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1883      }
1884      return &(m_samples);
1885    }
1886    
1887    const DataTypes::ValueType*
1888    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1889    {
1890        // we assume that any collapsing has been done before we get here
1891        // since we only have one argument we don't need to think about only
1892        // processing single points.
1893      if (m_readytype!='E')
1894      {
1895        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1896      }
1897      if (m_op==IDENTITY)
1898      {
1899        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1900      }
1901      size_t subroffset;
1902      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1903      roffset=m_samplesize*tid;
1904      size_t loop=0;
1905      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1906      size_t step=getNoValues();
1907      size_t offset=roffset;
1908      switch (m_op)
1909      {
1910        case SYM:
1911        for (loop=0;loop<numsteps;++loop)
1912        {
1913            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1914            subroffset+=step;
1915            offset+=step;
1916        }
1917        break;
1918        case NSYM:
1919        for (loop=0;loop<numsteps;++loop)
1920        {
1921            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1922            subroffset+=step;
1923            offset+=step;
1924        }
1925        break;
1926        default:
1927        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1928      }
1929      return &m_samples;
1930    }
1931    
1932    const DataTypes::ValueType*
1933    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1934    {
1935        // we assume that any collapsing has been done before we get here
1936        // since we only have one argument we don't need to think about only
1937        // processing single points.
1938      if (m_readytype!='E')
1939      {
1940        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1941      }
1942      if (m_op==IDENTITY)
1943      {
1944        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1945      }
1946      size_t subroffset;
1947      size_t offset;
1948      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1949      roffset=m_samplesize*tid;
1950      offset=roffset;
1951      size_t loop=0;
1952      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1953      size_t outstep=getNoValues();
1954      size_t instep=m_left->getNoValues();
1955      switch (m_op)
1956      {
1957        case TRACE:
1958        for (loop=0;loop<numsteps;++loop)
1959        {
1960                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1961            subroffset+=instep;
1962            offset+=outstep;
1963        }
1964        break;
1965        case TRANS:
1966        for (loop=0;loop<numsteps;++loop)
1967        {
1968                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1969            subroffset+=instep;
1970            offset+=outstep;
1971        }
1972        break;
1973        default:
1974        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1975      }
1976      return &m_samples;
1977    }
1978    
1979    
1980    const DataTypes::ValueType*
1981    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1982    {
1983      if (m_readytype!='E')
1984      {
1985        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1986      }
1987      if (m_op==IDENTITY)
1988      {
1989        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1990      }
1991      size_t subroffset;
1992      size_t offset;
1993      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1994      roffset=m_samplesize*tid;
1995      offset=roffset;
1996      size_t loop=0;
1997      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1998      size_t outstep=getNoValues();
1999      size_t instep=m_left->getNoValues();
2000      switch (m_op)
2001      {
2002        case SWAP:
2003        for (loop=0;loop<numsteps;++loop)
2004        {
2005                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
2006            subroffset+=instep;
2007            offset+=outstep;
2008        }
2009        break;
2010        default:
2011        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
2012      }
2013      return &m_samples;
2014    }
2015    
2016    
2017    
2018    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2019    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2020    // If both children are expanded, then we can process them in a single operation (we treat
2021    // the whole sample as one big datapoint.
2022    // If one of the children is not expanded, then we need to treat each point in the sample
2023    // individually.
2024    // There is an additional complication when scalar operations are considered.
2025    // For example, 2+Vector.
2026    // In this case each double within the point is treated individually
2027    const DataTypes::ValueType*
2028    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
2029    {
2030    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
2031    
2032      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2033        // first work out which of the children are expanded
2034      bool leftExp=(m_left->m_readytype=='E');
2035      bool rightExp=(m_right->m_readytype=='E');
2036      if (!leftExp && !rightExp)
2037      {
2038        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
2039      }
2040      bool leftScalar=(m_left->getRank()==0);
2041      bool rightScalar=(m_right->getRank()==0);
2042      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
2043      {
2044        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
2045      }
2046      size_t leftsize=m_left->getNoValues();
2047      size_t rightsize=m_right->getNoValues();
2048      size_t chunksize=1;           // how many doubles will be processed in one go
2049      int leftstep=0;       // how far should the left offset advance after each step
2050      int rightstep=0;
2051      int numsteps=0;       // total number of steps for the inner loop
2052      int oleftstep=0;  // the o variables refer to the outer loop
2053      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
2054      int onumsteps=1;
2055      
2056      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
2057      bool RES=(rightExp && rightScalar);
2058      bool LS=(!leftExp && leftScalar); // left is a single scalar
2059      bool RS=(!rightExp && rightScalar);
2060      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
2061      bool RN=(!rightExp && !rightScalar);
2062      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
2063      bool REN=(rightExp && !rightScalar);
2064    
2065      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
2066      {
2067        chunksize=m_left->getNumDPPSample()*leftsize;
2068        leftstep=0;
2069        rightstep=0;
2070        numsteps=1;
2071      }
2072      else if (LES || RES)
2073      {
2074        chunksize=1;
2075        if (LES)        // left is an expanded scalar
2076        {
2077            if (RS)
2078            {
2079               leftstep=1;
2080               rightstep=0;
2081               numsteps=m_left->getNumDPPSample();
2082            }
2083            else        // RN or REN
2084            {
2085               leftstep=0;
2086               oleftstep=1;
2087               rightstep=1;
2088               orightstep=(RN ? -(int)rightsize : 0);
2089               numsteps=rightsize;
2090               onumsteps=m_left->getNumDPPSample();
2091            }
2092        }
2093        else        // right is an expanded scalar
2094        {
2095            if (LS)
2096            {
2097               rightstep=1;
2098               leftstep=0;
2099               numsteps=m_right->getNumDPPSample();
2100            }
2101            else
2102            {
2103               rightstep=0;
2104               orightstep=1;
2105               leftstep=1;
2106               oleftstep=(LN ? -(int)leftsize : 0);
2107               numsteps=leftsize;
2108               onumsteps=m_right->getNumDPPSample();
2109            }
2110        }
2111      }
2112      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
2113      {
2114        if (LEN)    // and Right will be a single value
2115        {
2116            chunksize=rightsize;
2117            leftstep=rightsize;
2118            rightstep=0;
2119            numsteps=m_left->getNumDPPSample();
2120            if (RS)
2121            {
2122               numsteps*=leftsize;
2123            }
2124        }
2125        else    // REN
2126        {
2127            chunksize=leftsize;
2128            rightstep=leftsize;
2129            leftstep=0;
2130            numsteps=m_right->getNumDPPSample();
2131            if (LS)
2132            {
2133               numsteps*=rightsize;
2134            }
2135        }
2136      }
2137    
2138      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
2139        // Get the values of sub-expressions
2140      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
2141      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2142    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2143    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2144    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2145    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2146    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2147    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2148    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
2149    
2150    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2151    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2152    
2153    
2154      roffset=m_samplesize*tid;
2155      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
2156      switch(m_op)
2157      {
2158        case ADD:
2159            PROC_OP(NO_ARG,plus<double>());
2160        break;
2161        case SUB:
2162        PROC_OP(NO_ARG,minus<double>());
2163        break;
2164        case MUL:
2165        PROC_OP(NO_ARG,multiplies<double>());
2166        break;
2167        case DIV:
2168        PROC_OP(NO_ARG,divides<double>());
2169        break;
2170        case POW:
2171           PROC_OP(double (double,double),::pow);
2172        break;
2173        default:
2174        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2175      }
2176    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2177      return &m_samples;
2178    }
2179    
2180    
2181    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2182    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2183    // unlike the other resolve helpers, we must treat these datapoints separately.
2184    const DataTypes::ValueType*
2185    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2186    {
2187    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2188    
2189      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2190        // first work out which of the children are expanded
2191      bool leftExp=(m_left->m_readytype=='E');
2192      bool rightExp=(m_right->m_readytype=='E');
2193      int steps=getNumDPPSample();
2194      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
2195      int rightStep=(rightExp?m_right->getNoValues() : 0);
2196    
2197      int resultStep=getNoValues();
2198      roffset=m_samplesize*tid;
2199      size_t offset=roffset;
2200    
2201      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2202    
2203      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2204    
2205    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
2206    cout << getNoValues() << endl;)
2207    
2208    
2209    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2210    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2211    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2212    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2213    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2214    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2215    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2216    
2217      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
2218      switch(m_op)
2219      {
2220        case PROD:
2221        for (int i=0;i<steps;++i,resultp+=resultStep)
2222        {
2223              const double *ptr_0 = &((*left)[lroffset]);
2224              const double *ptr_1 = &((*right)[rroffset]);
2225    
2226    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2227    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2228    
2229              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2230    
2231          lroffset+=leftStep;
2232          rroffset+=rightStep;
2233        }
2234        break;
2235        default:
2236        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2237      }
2238      roffset=offset;
2239      return &m_samples;
2240    }
2241    #endif //LAZY_NODE_STORAGE
2242    
2243    /*
2244    \brief Compute the value of the expression for the given sample.    \brief Compute the value of the expression for the given sample.
2245    \return Vector which stores the value of the subexpression for the given sample.    \return Vector which stores the value of the subexpression for the given sample.
2246    \param v A vector to store intermediate results.    \param v A vector to store intermediate results.
# Line 687  cout << "Resolve binary: " << toString() Line 2257  cout << "Resolve binary: " << toString()
2257    
2258  // the roffset is the offset within the returned vector where the data begins  // the roffset is the offset within the returned vector where the data begins
2259  const DataTypes::ValueType*  const DataTypes::ValueType*
2260  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
2261  {  {
2262  cout << "Resolve sample " << toString() << endl;  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
2263      // collapse so we have a 'E' node or an IDENTITY for some other type      // collapse so we have a 'E' node or an IDENTITY for some other type
2264    if (m_readytype!='E' && m_op!=IDENTITY)    if (m_readytype!='E' && m_op!=IDENTITY)
2265    {    {
# Line 697  cout << "Resolve sample " << toString() Line 2267  cout << "Resolve sample " << toString()
2267    }    }
2268    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
2269    {    {
2270      const ValueType& vec=m_id->getVector();      const ValueType& vec=m_id->getVectorRO();
2271      if (m_readytype=='C')      if (m_readytype=='C')
2272      {      {
2273      roffset=0;      roffset=0;
2274    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
2275      return &(vec);      return &(vec);
2276      }      }
2277      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
2278    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
2279      return &(vec);      return &(vec);
2280    }    }
2281    if (m_readytype!='E')    if (m_readytype!='E')
# Line 712  cout << "Resolve sample " << toString() Line 2284  cout << "Resolve sample " << toString()
2284    }    }
2285    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2286    {    {
2287    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);    case G_UNARY:
2288      case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
2289    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
2290      case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
2291      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
2292      case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
2293      case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
2294      case G_REDUCTION: return resolveReduction(v, offset, sampleNo, roffset);
2295    default:    default:
2296      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
2297    }    }
2298    
2299    }
2300    
2301    const DataTypes::ValueType*
2302    DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
2303    {
2304    #ifdef _OPENMP
2305        int tid=omp_get_thread_num();
2306    #else
2307        int tid=0;
2308    #endif
2309    #ifdef LAZY_NODE_STORAGE
2310        return resolveNodeSample(tid, sampleNo, roffset);
2311    #else
2312        return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
2313    #endif
2314    }
2315    
2316    
2317    // This needs to do the work of the identity constructor
2318    void
2319    DataLazy::resolveToIdentity()
2320    {
2321       if (m_op==IDENTITY)
2322        return;
2323    #ifndef LAZY_NODE_STORAGE
2324       DataReady_ptr p=resolveVectorWorker();
2325    #else
2326       DataReady_ptr p=resolveNodeWorker();
2327    #endif
2328       makeIdentity(p);
2329    }
2330    
2331    void DataLazy::makeIdentity(const DataReady_ptr& p)
2332    {
2333       m_op=IDENTITY;
2334       m_axis_offset=0;
2335       m_transpose=0;
2336       m_SL=m_SM=m_SR=0;
2337       m_children=m_height=0;
2338       m_id=p;
2339       if(p->isConstant()) {m_readytype='C';}
2340       else if(p->isExpanded()) {m_readytype='E';}
2341       else if (p->isTagged()) {m_readytype='T';}
2342       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
2343       m_buffsRequired=1;
2344       m_samplesize=p->getNumDPPSample()*p->getNoValues();
2345       m_maxsamplesize=m_samplesize;
2346       m_left.reset();
2347       m_right.reset();
2348  }  }
2349    
2350    
 // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
2351  DataReady_ptr  DataReady_ptr
2352  DataLazy::resolve()  DataLazy::resolve()
2353  {  {
2354        resolveToIdentity();
2355        return m_id;
2356    }
2357    
2358    #ifdef LAZY_NODE_STORAGE
2359    
2360    // This version of resolve uses storage in each node to hold results
2361    DataReady_ptr
2362    DataLazy::resolveNodeWorker()
2363    {
2364      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2365      {
2366        collapse();
2367      }
2368      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
2369      {
2370        return m_id;
2371      }
2372        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2373      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2374      ValueType& resvec=result->getVectorRW();
2375      DataReady_ptr resptr=DataReady_ptr(result);
2376    
2377      int sample;
2378      int totalsamples=getNumSamples();
2379      const ValueType* res=0;   // Storage for answer
2380    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2381      #pragma omp parallel for private(sample,res) schedule(static)
2382      for (sample=0;sample<totalsamples;++sample)
2383      {
2384        size_t roffset=0;
2385    #ifdef _OPENMP
2386        res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2387    #else
2388        res=resolveNodeSample(0,sample,roffset);
2389    #endif
2390    LAZYDEBUG(cout << "Sample #" << sample << endl;)
2391    LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2392        DataVector::size_type outoffset=result->getPointOffset(sample,0);
2393        memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
2394      }
2395      return resptr;
2396    }
2397    
2398    #endif // LAZY_NODE_STORAGE
2399    
2400  cout << "Sample size=" << m_samplesize << endl;  // To simplify the memory management, all threads operate on one large vector, rather than one each.
2401  cout << "Buffers=" << m_buffsRequired << endl;  // Each sample is evaluated independently and copied into the result DataExpanded.
2402    DataReady_ptr
2403    DataLazy::resolveVectorWorker()
2404    {
2405    
2406    LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
2407    LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
2408    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2409    {    {
2410      collapse();      collapse();
# Line 738  cout << "Buffers=" << m_buffsRequired << Line 2414  cout << "Buffers=" << m_buffsRequired <<
2414      return m_id;      return m_id;
2415    }    }
2416      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2417    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired));    // Each thread needs to have enough    size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
2418      // storage to evaluate its expression      // storage to evaluate its expression
2419    int numthreads=1;    int numthreads=1;
2420  #ifdef _OPENMP  #ifdef _OPENMP
2421    numthreads=getNumberOfThreads();    numthreads=omp_get_max_threads();
   int threadnum=0;  
2422  #endif  #endif
2423    ValueType v(numthreads*threadbuffersize);    ValueType v(numthreads*threadbuffersize);
2424  cout << "Buffer created with size=" << v.size() << endl;  LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
2425    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2426    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
2427    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
2428    int sample;    int sample;
2429    size_t outoffset;     // offset in the output data    size_t outoffset;     // offset in the output data
2430    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
2431    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Vector storing the answer
2432    size_t resoffset=0;       // where in the vector to find the answer    size_t resoffset=0;       // where in the vector to find the answer
2433    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2434      #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
2435    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
2436    {    {
2437  cout << "################################# " << sample << endl;  LAZYDEBUG(cout << "################################# " << sample << endl;)
2438  #ifdef _OPENMP  #ifdef _OPENMP
2439      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);      res=resolveVectorSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
2440  #else  #else
2441      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.      res=resolveVectorSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
2442  #endif  #endif
2443  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
2444    LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
2445      outoffset=result->getPointOffset(sample,0);      outoffset=result->getPointOffset(sample,0);
2446  cerr << "offset=" << outoffset << endl;  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
2447      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
2448      {      {
2449    LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
2450      resvec[outoffset]=(*res)[resoffset];      resvec[outoffset]=(*res)[resoffset];
2451      }      }
2452  cerr << "*********************************" << endl;  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
2453    LAZYDEBUG(cerr << "*********************************" << endl;)
2454    }    }
2455    return resptr;    return resptr;
2456  }  }
# Line 781  DataLazy::toString() const Line 2460  DataLazy::toString() const
2460  {  {
2461    ostringstream oss;    ostringstream oss;
2462    oss << "Lazy Data:";    oss << "Lazy Data:";
2463    intoString(oss);    if (escriptParams.getPRINT_LAZY_TREE()==0)
2464      {
2465          intoString(oss);
2466      }
2467      else
2468      {
2469        oss << endl;
2470        intoTreeString(oss,"");
2471      }
2472    return oss.str();    return oss.str();
2473  }  }
2474    
# Line 789  DataLazy::toString() const Line 2476  DataLazy::toString() const
2476  void  void
2477  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
2478  {  {
2479    //    oss << "[" << m_children <<";"<<m_height <<"]";
2480    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2481    {    {
2482    case G_IDENTITY:    case G_IDENTITY:
# Line 818  DataLazy::intoString(ostringstream& oss) Line 2506  DataLazy::intoString(ostringstream& oss)
2506      oss << ')';      oss << ')';
2507      break;      break;
2508    case G_UNARY:    case G_UNARY:
2509      case G_UNARY_P:
2510      case G_NP1OUT:
2511      case G_NP1OUT_P:
2512      case G_REDUCTION:
2513        oss << opToString(m_op) << '(';
2514        m_left->intoString(oss);
2515        oss << ')';
2516        break;
2517      case G_TENSORPROD:
2518        oss << opToString(m_op) << '(';
2519        m_left->intoString(oss);
2520        oss << ", ";
2521        m_right->intoString(oss);
2522        oss << ')';
2523        break;
2524      case G_NP1OUT_2P:
2525      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
2526      m_left->intoString(oss);      m_left->intoString(oss);
2527        oss << ", " << m_axis_offset << ", " << m_transpose;
2528      oss << ')';      oss << ')';
2529      break;      break;
2530    default:    default:
# Line 827  DataLazy::intoString(ostringstream& oss) Line 2532  DataLazy::intoString(ostringstream& oss)
2532    }    }
2533  }  }
2534    
2535  // Note that in this case, deepCopy does not make copies of the leaves.  
2536  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
2537    DataLazy::intoTreeString(ostringstream& oss, string indent) const
2538    {
2539      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
2540      switch (getOpgroup(m_op))
2541      {
2542      case G_IDENTITY:
2543        if (m_id->isExpanded())
2544        {
2545           oss << "E";
2546        }
2547        else if (m_id->isTagged())
2548        {
2549          oss << "T";
2550        }
2551        else if (m_id->isConstant())
2552        {
2553          oss << "C";
2554        }
2555        else
2556        {
2557          oss << "?";
2558        }
2559        oss << '@' << m_id.get() << endl;
2560        break;
2561      case G_BINARY:
2562        oss << opToString(m_op) << endl;
2563        indent+='.';
2564        m_left->intoTreeString(oss, indent);
2565        m_right->intoTreeString(oss, indent);
2566        break;
2567      case G_UNARY:
2568      case G_UNARY_P:
2569      case G_NP1OUT:
2570      case G_NP1OUT_P:
2571      case G_REDUCTION:
2572        oss << opToString(m_op) << endl;
2573        indent+='.';
2574        m_left->intoTreeString(oss, indent);
2575        break;
2576      case G_TENSORPROD:
2577        oss << opToString(m_op) << endl;
2578        indent+='.';
2579        m_left->intoTreeString(oss, indent);
2580        m_right->intoTreeString(oss, indent);
2581        break;
2582      case G_NP1OUT_2P:
2583        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2584        indent+='.';
2585        m_left->intoTreeString(oss, indent);
2586        break;
2587      default:
2588        oss << "UNKNOWN";
2589      }
2590    }
2591    
2592    
2593  DataAbstract*  DataAbstract*
2594  DataLazy::deepCopy()  DataLazy::deepCopy()
2595  {  {
2596    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
2597    {    {
2598      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2599      case G_UNARY:
2600      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2601      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2602      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2603      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2604      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2605      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2606      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2607      default:
2608        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2609    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2610  }  }
2611    
2612    
2613    
2614    // There is no single, natural interpretation of getLength on DataLazy.
2615    // Instances of DataReady can look at the size of their vectors.
2616    // For lazy though, it could be the size the data would be if it were resolved;
2617    // or it could be some function of the lengths of the DataReady instances which
2618    // form part of the expression.
2619    // Rather than have people making assumptions, I have disabled the method.
2620  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2621  DataLazy::getLength() const  DataLazy::getLength() const
2622  {  {
2623    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2624  }  }
2625    
2626    
# Line 853  DataLazy::getSlice(const DataTypes::Regi Line 2630  DataLazy::getSlice(const DataTypes::Regi
2630    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2631  }  }
2632    
2633    
2634    // To do this we need to rely on our child nodes
2635    DataTypes::ValueType::size_type
2636    DataLazy::getPointOffset(int sampleNo,
2637                     int dataPointNo)
2638    {
2639      if (m_op==IDENTITY)
2640      {
2641        return m_id->getPointOffset(sampleNo,dataPointNo);
2642      }
2643      if (m_readytype!='E')
2644      {
2645        collapse();
2646        return m_id->getPointOffset(sampleNo,dataPointNo);
2647      }
2648      // at this point we do not have an identity node and the expression will be Expanded
2649      // so we only need to know which child to ask
2650      if (m_left->m_readytype=='E')
2651      {
2652        return m_left->getPointOffset(sampleNo,dataPointNo);
2653      }
2654      else
2655      {
2656        return m_right->getPointOffset(sampleNo,dataPointNo);
2657      }
2658    }
2659    
2660    // To do this we need to rely on our child nodes
2661  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2662  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2663                   int dataPointNo) const                   int dataPointNo) const
2664  {  {
2665    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2666      {
2667        return m_id->getPointOffset(sampleNo,dataPointNo);
2668      }
2669      if (m_readytype=='E')
2670      {
2671        // at this point we do not have an identity node and the expression will be Expanded
2672        // so we only need to know which child to ask
2673        if (m_left->m_readytype=='E')
2674        {
2675        return m_left->getPointOffset(sampleNo,dataPointNo);
2676        }
2677        else
2678        {
2679        return m_right->getPointOffset(sampleNo,dataPointNo);
2680        }
2681      }
2682      if (m_readytype=='C')
2683      {
2684        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2685      }
2686      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2687  }  }
2688    
2689  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2690  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2691  void  void
2692  DataLazy::setToZero()  DataLazy::setToZero()
2693  {  {
2694    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
2695    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2696    m_op=IDENTITY;  //   m_op=IDENTITY;
2697    m_right.reset();    //   m_right.reset();  
2698    m_left.reset();  //   m_left.reset();
2699    m_readytype='C';  //   m_readytype='C';
2700    m_buffsRequired=1;  //   m_buffsRequired=1;
2701    
2702      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2703      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2704    }
2705    
2706    bool
2707    DataLazy::actsExpanded() const
2708    {
2709        return (m_readytype=='E');
2710  }  }
2711    
2712  }   // end namespace  }   // end namespace

Legend:
Removed from v.1926  
changed lines
  Added in v.2737

  ViewVC Help
Powered by ViewVC 1.1.26