/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escript/src/DataLazy.cpp revision 2737 by jfenwick, Tue Nov 3 00:44:00 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #include <iomanip>      // for some fancy formatting in debug
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49    
50    /*
51    How does DataLazy work?
52    ~~~~~~~~~~~~~~~~~~~~~~~
53    
54    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
55    denoted left and right.
56    
57    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
58    This means that all "internal" nodes in the structure are instances of DataLazy.
59    
60    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
61    Note that IDENITY is not considered a unary operation.
62    
63    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
64    It must however form a DAG (directed acyclic graph).
65    I will refer to individual DataLazy objects with the structure as nodes.
66    
67    Each node also stores:
68    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69        evaluated.
70    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71        evaluate the expression.
72    - m_samplesize ~ the number of doubles stored in a sample.
73    
74    When a new node is created, the above values are computed based on the values in the child nodes.
75    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
76    
77    The resolve method, which produces a DataReady from a DataLazy, does the following:
78    1) Create a DataReady to hold the new result.
79    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
80    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
81    
82    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
83    
84    resolveSample returns a Vector* and an offset within that vector where the result is stored.
85    Normally, this would be v, but for identity nodes their internal vector is returned instead.
86    
87    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
88    
89    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91    
92    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93    1) Add to the ES_optype.
94    2) determine what opgroup your operation belongs to (X)
95    3) add a string for the op to the end of ES_opstrings
96    4) increase ES_opcount
97    5) add an entry (X) to opgroups
98    6) add an entry to the switch in collapseToReady
99    7) add an entry to resolveX
100    */
101    
102    
103  using namespace std;  using namespace std;
104  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 106  using namespace boost;
106  namespace escript  namespace escript
107  {  {
108    
 const std::string&  
 opToString(ES_optype op);  
   
109  namespace  namespace
110  {  {
111    
   
   
112  enum ES_opgroup  enum ES_opgroup
113  {  {
114     G_UNKNOWN,     G_UNKNOWN,
115     G_IDENTITY,     G_IDENTITY,
116     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
117     G_UNARY     G_UNARY,     // pointwise operations with one argument
118       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
119       G_NP1OUT,        // non-pointwise op with one output
120       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
121       G_TENSORPROD,    // general tensor product
122       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
123       G_REDUCTION      // non-pointwise unary op with a scalar output
124  };  };
125    
126    
127    
128    
129  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
130                "sin","cos","tan",
131              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
132              "asinh","acosh","atanh",              "asinh","acosh","atanh",
133              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
134              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
135  int ES_opcount=32;              "symmetric","nonsymmetric",
136  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
137              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose", "trace",
138              G_UNARY,G_UNARY,G_UNARY,                    // 19              "swapaxes",
139              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27              "minval", "maxval"};
140              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};  int ES_opcount=43;
141    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
142                G_UNARY,G_UNARY,G_UNARY, //10
143                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
144                G_UNARY,G_UNARY,G_UNARY,                    // 20
145                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
146                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
147                G_NP1OUT,G_NP1OUT,
148                G_TENSORPROD,
149                G_NP1OUT_P, G_NP1OUT_P,
150                G_NP1OUT_2P,
151                G_REDUCTION, G_REDUCTION};
152  inline  inline
153  ES_opgroup  ES_opgroup
154  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 165  resultFS(DataAbstract_ptr left, DataAbst
165      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
166      // programming error exception.      // programming error exception.
167    
168      FunctionSpace l=left->getFunctionSpace();
169      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
170      {    if (l!=r)
171          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
172      }      if (r.probeInterpolation(l))
173      return left->getFunctionSpace();      {
174        return l;
175        }
176        if (l.probeInterpolation(r))
177        {
178        return r;
179        }
180        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
181      }
182      return l;
183  }  }
184    
185  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
186    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
187  DataTypes::ShapeType  DataTypes::ShapeType
188  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
189  {  {
190      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
191      {      {
192          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
193          {
194            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
195          }
196    
197          if (left->getRank()==0)   // we need to allow scalar * anything
198          {
199            return right->getShape();
200          }
201          if (right->getRank()==0)
202          {
203            return left->getShape();
204          }
205          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
206      }      }
207      return left->getShape();      return left->getShape();
208  }  }
209    
210  size_t  // return the shape for "op left"
211  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
212    DataTypes::ShapeType
213    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
214  {  {
215     switch (getOpgroup(op))      switch(op)
216     {      {
217     case G_BINARY: return left->getLength();          case TRANS:
218     case G_UNARY: return left->getLength();         {            // for the scoping of variables
219     default:          const DataTypes::ShapeType& s=left->getShape();
220      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
221     }          int rank=left->getRank();
222            if (axis_offset<0 || axis_offset>rank)
223            {
224                   throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
225                }
226                for (int i=0; i<rank; i++)
227            {
228               int index = (axis_offset+i)%rank;
229                   sh.push_back(s[index]); // Append to new shape
230                }
231            return sh;
232           }
233        break;
234        case TRACE:
235           {
236            int rank=left->getRank();
237            if (rank<2)
238            {
239               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
240            }
241            if ((axis_offset>rank-2) || (axis_offset<0))
242            {
243               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
244            }
245            if (rank==2)
246            {
247               return DataTypes::scalarShape;
248            }
249            else if (rank==3)
250            {
251               DataTypes::ShapeType sh;
252                   if (axis_offset==0)
253               {
254                    sh.push_back(left->getShape()[2]);
255                   }
256                   else     // offset==1
257               {
258                sh.push_back(left->getShape()[0]);
259                   }
260               return sh;
261            }
262            else if (rank==4)
263            {
264               DataTypes::ShapeType sh;
265               const DataTypes::ShapeType& s=left->getShape();
266                   if (axis_offset==0)
267               {
268                    sh.push_back(s[2]);
269                    sh.push_back(s[3]);
270                   }
271                   else if (axis_offset==1)
272               {
273                    sh.push_back(s[0]);
274                    sh.push_back(s[3]);
275                   }
276               else     // offset==2
277               {
278                sh.push_back(s[0]);
279                sh.push_back(s[1]);
280               }
281               return sh;
282            }
283            else        // unknown rank
284            {
285               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
286            }
287           }
288        break;
289            default:
290        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
291        }
292    }
293    
294    DataTypes::ShapeType
295    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
296    {
297         // This code taken from the Data.cpp swapaxes() method
298         // Some of the checks are probably redundant here
299         int axis0_tmp,axis1_tmp;
300         const DataTypes::ShapeType& s=left->getShape();
301         DataTypes::ShapeType out_shape;
302         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
303         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
304         int rank=left->getRank();
305         if (rank<2) {
306            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
307         }
308         if (axis0<0 || axis0>rank-1) {
309            throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
310         }
311         if (axis1<0 || axis1>rank-1) {
312             throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
313         }
314         if (axis0 == axis1) {
315             throw DataException("Error - Data::swapaxes: axis indices must be different.");
316         }
317         if (axis0 > axis1) {
318             axis0_tmp=axis1;
319             axis1_tmp=axis0;
320         } else {
321             axis0_tmp=axis0;
322             axis1_tmp=axis1;
323         }
324         for (int i=0; i<rank; i++) {
325           if (i == axis0_tmp) {
326              out_shape.push_back(s[axis1_tmp]);
327           } else if (i == axis1_tmp) {
328              out_shape.push_back(s[axis0_tmp]);
329           } else {
330              out_shape.push_back(s[i]);
331           }
332         }
333        return out_shape;
334  }  }
335    
336    
337    // determine the output shape for the general tensor product operation
338    // the additional parameters return information required later for the product
339    // the majority of this code is copy pasted from C_General_Tensor_Product
340    DataTypes::ShapeType
341    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
342    {
343        
344      // Get rank and shape of inputs
345      int rank0 = left->getRank();
346      int rank1 = right->getRank();
347      const DataTypes::ShapeType& shape0 = left->getShape();
348      const DataTypes::ShapeType& shape1 = right->getShape();
349    
350      // Prepare for the loops of the product and verify compatibility of shapes
351      int start0=0, start1=0;
352      if (transpose == 0)       {}
353      else if (transpose == 1)  { start0 = axis_offset; }
354      else if (transpose == 2)  { start1 = rank1-axis_offset; }
355      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
356    
357      if (rank0<axis_offset)
358      {
359        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
360      }
361    
362      // Adjust the shapes for transpose
363      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
364      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
365      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
366      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
367    
368      // Prepare for the loops of the product
369      SL=1, SM=1, SR=1;
370      for (int i=0; i<rank0-axis_offset; i++)   {
371        SL *= tmpShape0[i];
372      }
373      for (int i=rank0-axis_offset; i<rank0; i++)   {
374        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
375          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
376        }
377        SM *= tmpShape0[i];
378      }
379      for (int i=axis_offset; i<rank1; i++)     {
380        SR *= tmpShape1[i];
381      }
382    
383      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
384      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
385      {         // block to limit the scope of out_index
386         int out_index=0;
387         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
388         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
389      }
390    
391      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
392      {
393         ostringstream os;
394         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
395         throw DataException(os.str());
396      }
397    
398      return shape2;
399    }
400    
401    // determine the number of samples requires to evaluate an expression combining left and right
402    // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
403    // The same goes for G_TENSORPROD
404    // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
405    // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
406    // multiple times
407  int  int
408  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
409  {  {
410     switch(getOpgroup(op))     switch(getOpgroup(op))
411     {     {
412     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
413     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
414     case G_UNARY: return max(left->getBuffsRequired(),1);     case G_REDUCTION:
415       case G_UNARY:
416       case G_UNARY_P: return max(left->getBuffsRequired(),1);
417       case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
418       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
419       case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
420       case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
421     default:     default:
422      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
423     }     }
424  }  }
425    
426    
   
427  }   // end anonymous namespace  }   // end anonymous namespace
428    
429    
430    
431    // Return a string representing the operation
432  const std::string&  const std::string&
433  opToString(ES_optype op)  opToString(ES_optype op)
434  {  {
# Line 138  opToString(ES_optype op) Line 439  opToString(ES_optype op)
439    return ES_opstrings[op];    return ES_opstrings[op];
440  }  }
441    
442    #ifdef LAZY_NODE_STORAGE
443    void DataLazy::LazyNodeSetup()
444    {
445    #ifdef _OPENMP
446        int numthreads=omp_get_max_threads();
447        m_samples.resize(numthreads*m_samplesize);
448        m_sampleids=new int[numthreads];
449        for (int i=0;i<numthreads;++i)
450        {
451            m_sampleids[i]=-1;  
452        }
453    #else
454        m_samples.resize(m_samplesize);
455        m_sampleids=new int[1];
456        m_sampleids[0]=-1;
457    #endif  // _OPENMP
458    }
459    #endif   // LAZY_NODE_STORAGE
460    
461    
462    // Creates an identity node
463  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
464      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
465      m_op(IDENTITY)  #ifdef LAZY_NODE_STORAGE
466        ,m_sampleids(0),
467        m_samples(1)
468    #endif
469  {  {
470     if (p->isLazy())     if (p->isLazy())
471     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
472      // I don't want identity of Lazy.      // I don't want identity of Lazy.
473      // Question: Why would that be so bad?      // Question: Why would that be so bad?
474      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 153  DataLazy::DataLazy(DataAbstract_ptr p) Line 476  DataLazy::DataLazy(DataAbstract_ptr p)
476     }     }
477     else     else
478     {     {
479      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
480        DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
481        makeIdentity(dr);
482    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
483     }     }
484     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
485  }  }
486    
487  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
488      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
489      m_op(op)      m_op(op),
490        m_axis_offset(0),
491        m_transpose(0),
492        m_SL(0), m_SM(0), m_SR(0)
493  {  {
494     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
495     {     {
496      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
497     }     }
498    
499     DataLazy_ptr lleft;     DataLazy_ptr lleft;
500     if (!left->isLazy())     if (!left->isLazy())
501     {     {
# Line 178  DataLazy::DataLazy(DataAbstract_ptr left Line 505  DataLazy::DataLazy(DataAbstract_ptr left
505     {     {
506      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
507     }     }
508     m_length=left->getLength();     m_readytype=lleft->m_readytype;
509     m_left=lleft;     m_left=lleft;
510     m_buffsRequired=1;     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
511     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
512       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
513       m_children=m_left->m_children+1;
514       m_height=m_left->m_height+1;
515    #ifdef LAZY_NODE_STORAGE
516       LazyNodeSetup();
517    #endif
518       SIZELIMIT
519  }  }
520    
521    
522  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
523    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
524      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
525      m_left(left),      m_op(op),
526      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
527  {  {
528     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
529       if ((getOpgroup(op)!=G_BINARY))
530     {     {
531      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
532     }     }
533     m_length=resultLength(m_left,m_right,m_op);  
534       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
535       {
536        FunctionSpace fs=getFunctionSpace();
537        Data ltemp(left);
538        Data tmp(ltemp,fs);
539        left=tmp.borrowDataPtr();
540       }
541       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
542       {
543        Data tmp(Data(right),getFunctionSpace());
544        right=tmp.borrowDataPtr();
545    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
546       }
547       left->operandCheck(*right);
548    
549       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
550       {
551        m_left=dynamic_pointer_cast<DataLazy>(left);
552    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
553       }
554       else
555       {
556        m_left=DataLazy_ptr(new DataLazy(left));
557    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
558       }
559       if (right->isLazy())
560       {
561        m_right=dynamic_pointer_cast<DataLazy>(right);
562    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
563       }
564       else
565       {
566        m_right=DataLazy_ptr(new DataLazy(right));
567    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
568       }
569       char lt=m_left->m_readytype;
570       char rt=m_right->m_readytype;
571       if (lt=='E' || rt=='E')
572       {
573        m_readytype='E';
574       }
575       else if (lt=='T' || rt=='T')
576       {
577        m_readytype='T';
578       }
579       else
580       {
581        m_readytype='C';
582       }
583     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
584     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
585  cout << "(2)Lazy created with " << m_samplesize << endl;     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
586       m_children=m_left->m_children+m_right->m_children+2;
587       m_height=max(m_left->m_height,m_right->m_height)+1;
588    #ifdef LAZY_NODE_STORAGE
589       LazyNodeSetup();
590    #endif
591       SIZELIMIT
592    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
593  }  }
594    
595  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
596      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
597      m_op(op)      m_op(op),
598        m_axis_offset(axis_offset),
599        m_transpose(transpose)
600  {  {
601     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
602       {
603        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
604       }
605       if ((transpose>2) || (transpose<0))
606       {
607        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
608       }
609       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
610     {     {
611      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");      FunctionSpace fs=getFunctionSpace();
612        Data ltemp(left);
613        Data tmp(ltemp,fs);
614        left=tmp.borrowDataPtr();
615     }     }
616     if (left->isLazy())     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
617       {
618        Data tmp(Data(right),getFunctionSpace());
619        right=tmp.borrowDataPtr();
620       }
621    //    left->operandCheck(*right);
622    
623       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
624     {     {
625      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
626     }     }
# Line 225  DataLazy::DataLazy(DataAbstract_ptr left Line 636  DataLazy::DataLazy(DataAbstract_ptr left
636     {     {
637      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
638     }     }
639       char lt=m_left->m_readytype;
640     m_length=resultLength(m_left,m_right,m_op);     char rt=m_right->m_readytype;
641       if (lt=='E' || rt=='E')
642       {
643        m_readytype='E';
644       }
645       else if (lt=='T' || rt=='T')
646       {
647        m_readytype='T';
648       }
649       else
650       {
651        m_readytype='C';
652       }
653     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
654       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
655     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
656  cout << "(3)Lazy created with " << m_samplesize << endl;     m_children=m_left->m_children+m_right->m_children+2;
657       m_height=max(m_left->m_height,m_right->m_height)+1;
658    #ifdef LAZY_NODE_STORAGE
659       LazyNodeSetup();
660    #endif
661       SIZELIMIT
662    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
663  }  }
664    
665    
666    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
667        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
668        m_op(op),
669        m_axis_offset(axis_offset),
670        m_transpose(0),
671        m_tol(0)
672    {
673       if ((getOpgroup(op)!=G_NP1OUT_P))
674       {
675        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
676       }
677       DataLazy_ptr lleft;
678       if (!left->isLazy())
679       {
680        lleft=DataLazy_ptr(new DataLazy(left));
681       }
682       else
683       {
684        lleft=dynamic_pointer_cast<DataLazy>(left);
685       }
686       m_readytype=lleft->m_readytype;
687       m_left=lleft;
688       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
689       m_samplesize=getNumDPPSample()*getNoValues();
690       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
691       m_children=m_left->m_children+1;
692       m_height=m_left->m_height+1;
693    #ifdef LAZY_NODE_STORAGE
694       LazyNodeSetup();
695    #endif
696       SIZELIMIT
697    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
698    }
699    
700    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
701        : parent(left->getFunctionSpace(), left->getShape()),
702        m_op(op),
703        m_axis_offset(0),
704        m_transpose(0),
705        m_tol(tol)
706    {
707       if ((getOpgroup(op)!=G_UNARY_P))
708       {
709        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
710       }
711       DataLazy_ptr lleft;
712       if (!left->isLazy())
713       {
714        lleft=DataLazy_ptr(new DataLazy(left));
715       }
716       else
717       {
718        lleft=dynamic_pointer_cast<DataLazy>(left);
719       }
720       m_readytype=lleft->m_readytype;
721       m_left=lleft;
722       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
723       m_samplesize=getNumDPPSample()*getNoValues();
724       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
725       m_children=m_left->m_children+1;
726       m_height=m_left->m_height+1;
727    #ifdef LAZY_NODE_STORAGE
728       LazyNodeSetup();
729    #endif
730       SIZELIMIT
731    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
732    }
733    
734    
735    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
736        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
737        m_op(op),
738        m_axis_offset(axis0),
739        m_transpose(axis1),
740        m_tol(0)
741    {
742       if ((getOpgroup(op)!=G_NP1OUT_2P))
743       {
744        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
745       }
746       DataLazy_ptr lleft;
747       if (!left->isLazy())
748       {
749        lleft=DataLazy_ptr(new DataLazy(left));
750       }
751       else
752       {
753        lleft=dynamic_pointer_cast<DataLazy>(left);
754       }
755       m_readytype=lleft->m_readytype;
756       m_left=lleft;
757       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
758       m_samplesize=getNumDPPSample()*getNoValues();
759       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
760       m_children=m_left->m_children+1;
761       m_height=m_left->m_height+1;
762    #ifdef LAZY_NODE_STORAGE
763       LazyNodeSetup();
764    #endif
765       SIZELIMIT
766    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
767    }
768    
769  DataLazy::~DataLazy()  DataLazy::~DataLazy()
770  {  {
771    #ifdef LAZY_NODE_SETUP
772       delete[] m_sampleids;
773       delete[] m_samples;
774    #endif
775  }  }
776    
777    
# Line 245  DataLazy::getBuffsRequired() const Line 782  DataLazy::getBuffsRequired() const
782  }  }
783    
784    
785  // the vector and the offset are a place where the method could write its data if it wishes  size_t
786  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::getMaxSampleSize() const
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
 const double*  
 DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  
787  {  {
788    if (m_op==IDENTITY)        return m_maxsamplesize;
789    }
790    
791    
792    
793    size_t
794    DataLazy::getSampleBufferSize() const
795    {
796        return m_maxsamplesize*(max(1,m_buffsRequired));
797    }
798    
799    /*
800      \brief Evaluates the expression using methods on Data.
801      This does the work for the collapse method.
802      For reasons of efficiency do not call this method on DataExpanded nodes.
803    */
804    DataReady_ptr
805    DataLazy::collapseToReady()
806    {
807      if (m_readytype=='E')
808      { // this is more an efficiency concern than anything else
809        throw DataException("Programmer Error - do not use collapse on Expanded data.");
810      }
811      if (m_op==IDENTITY)
812    {    {
813      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
814    }    }
815    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
816    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
817    const double* right=0;    Data right;
818    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
819    {    {
820      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
821    }    }
822    double* result=&(v[offset]);    Data result;
823      switch(m_op)
824    {    {
825      switch(m_op)      case ADD:
826      {      result=left+right;
     case ADD:       // since these are pointwise ops, pretend each sample is one point  
     tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  
827      break;      break;
828      case SUB:            case SUB:      
829      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      result=left-right;
830      break;      break;
831      case MUL:            case MUL:      
832      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      result=left*right;
833      break;      break;
834      case DIV:            case DIV:      
835      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      result=left/right;
836      break;      break;
 // unary ops  
837      case SIN:      case SIN:
838      tensor_unary_operation(m_samplesize, left, result, ::sin);      result=left.sin();  
839        break;
840        case COS:
841        result=left.cos();
842        break;
843        case TAN:
844        result=left.tan();
845        break;
846        case ASIN:
847        result=left.asin();
848        break;
849        case ACOS:
850        result=left.acos();
851        break;
852        case ATAN:
853        result=left.atan();
854        break;
855        case SINH:
856        result=left.sinh();
857        break;
858        case COSH:
859        result=left.cosh();
860        break;
861        case TANH:
862        result=left.tanh();
863        break;
864        case ERF:
865        result=left.erf();
866        break;
867       case ASINH:
868        result=left.asinh();
869        break;
870       case ACOSH:
871        result=left.acosh();
872        break;
873       case ATANH:
874        result=left.atanh();
875        break;
876        case LOG10:
877        result=left.log10();
878        break;
879        case LOG:
880        result=left.log();
881        break;
882        case SIGN:
883        result=left.sign();
884        break;
885        case ABS:
886        result=left.abs();
887        break;
888        case NEG:
889        result=left.neg();
890        break;
891        case POS:
892        // it doesn't mean anything for delayed.
893        // it will just trigger a deep copy of the lazy object
894        throw DataException("Programmer error - POS not supported for lazy data.");
895        break;
896        case EXP:
897        result=left.exp();
898        break;
899        case SQRT:
900        result=left.sqrt();
901        break;
902        case RECIP:
903        result=left.oneOver();
904        break;
905        case GZ:
906        result=left.wherePositive();
907        break;
908        case LZ:
909        result=left.whereNegative();
910        break;
911        case GEZ:
912        result=left.whereNonNegative();
913        break;
914        case LEZ:
915        result=left.whereNonPositive();
916        break;
917        case NEZ:
918        result=left.whereNonZero(m_tol);
919        break;
920        case EZ:
921        result=left.whereZero(m_tol);
922        break;
923        case SYM:
924        result=left.symmetric();
925        break;
926        case NSYM:
927        result=left.nonsymmetric();
928        break;
929        case PROD:
930        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
931        break;
932        case TRANS:
933        result=left.transpose(m_axis_offset);
934        break;
935        case TRACE:
936        result=left.trace(m_axis_offset);
937        break;
938        case SWAP:
939        result=left.swapaxes(m_axis_offset, m_transpose);
940        break;
941        case MINVAL:
942        result=left.minval();
943        break;
944        case MAXVAL:
945        result=left.minval();
946        break;
947        default:
948        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
949      }
950      return result.borrowReadyPtr();
951    }
952    
953    /*
954       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
955       This method uses the original methods on the Data class to evaluate the expressions.
956       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
957       the purpose of using DataLazy in the first place).
958    */
959    void
960    DataLazy::collapse()
961    {
962      if (m_op==IDENTITY)
963      {
964        return;
965      }
966      if (m_readytype=='E')
967      { // this is more an efficiency concern than anything else
968        throw DataException("Programmer Error - do not use collapse on Expanded data.");
969      }
970      m_id=collapseToReady();
971      m_op=IDENTITY;
972    }
973    
974    /*
975      \brief Compute the value of the expression (unary operation) for the given sample.
976      \return Vector which stores the value of the subexpression for the given sample.
977      \param v A vector to store intermediate results.
978      \param offset Index in v to begin storing results.
979      \param sampleNo Sample number to evaluate.
980      \param roffset (output parameter) the offset in the return vector where the result begins.
981    
982      The return value will be an existing vector so do not deallocate it.
983      If the result is stored in v it should be stored at the offset given.
984      Everything from offset to the end of v should be considered available for this method to use.
985    */
986    DataTypes::ValueType*
987    DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
988    {
989        // we assume that any collapsing has been done before we get here
990        // since we only have one argument we don't need to think about only
991        // processing single points.
992      if (m_readytype!='E')
993      {
994        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
995      }
996      const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
997      const double* left=&((*vleft)[roffset]);
998      double* result=&(v[offset]);
999      roffset=offset;
1000      switch (m_op)
1001      {
1002        case SIN:  
1003        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1004      break;      break;
1005      case COS:      case COS:
1006      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1007      break;      break;
1008      case TAN:      case TAN:
1009      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1010      break;      break;
1011      case ASIN:      case ASIN:
1012      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1013      break;      break;
1014      case ACOS:      case ACOS:
1015      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1016      break;      break;
1017      case ATAN:      case ATAN:
1018      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1019      break;      break;
1020      case SINH:      case SINH:
1021      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1022      break;      break;
1023      case COSH:      case COSH:
1024      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1025      break;      break;
1026      case TANH:      case TANH:
1027      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1028      break;      break;
1029      case ERF:      case ERF:
1030  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1031      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1032  #else  #else
1033      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1034      break;      break;
1035  #endif  #endif
1036     case ASINH:     case ASINH:
1037  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1038      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1039  #else  #else
1040      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1041  #endif    #endif  
1042      break;      break;
1043     case ACOSH:     case ACOSH:
1044  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1045      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1046  #else  #else
1047      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1048  #endif    #endif  
1049      break;      break;
1050     case ATANH:     case ATANH:
1051  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1052      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1053  #else  #else
1054      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1055  #endif    #endif  
1056      break;      break;
1057      case LOG10:      case LOG10:
1058      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1059      break;      break;
1060      case LOG:      case LOG:
1061      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1062      break;      break;
1063      case SIGN:      case SIGN:
1064      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1065      break;      break;
1066      case ABS:      case ABS:
1067      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1068      break;      break;
1069      case NEG:      case NEG:
1070      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 357  DataLazy::resolveSample(ValueType& v,int Line 1075  DataLazy::resolveSample(ValueType& v,int
1075      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1076      break;      break;
1077      case EXP:      case EXP:
1078      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1079      break;      break;
1080      case SQRT:      case SQRT:
1081      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1082      break;      break;
1083      case RECIP:      case RECIP:
1084      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 377  DataLazy::resolveSample(ValueType& v,int Line 1095  DataLazy::resolveSample(ValueType& v,int
1095      case LEZ:      case LEZ:
1096      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1097      break;      break;
1098    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1099        case NEZ:
1100        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1101        break;
1102        case EZ:
1103        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1104        break;
1105    
1106      default:      default:
1107      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1108      }
1109      return &v;
1110    }
1111    
1112    
1113    /*
1114      \brief Compute the value of the expression (reduction operation) for the given sample.
1115      \return Vector which stores the value of the subexpression for the given sample.
1116      \param v A vector to store intermediate results.
1117      \param offset Index in v to begin storing results.
1118      \param sampleNo Sample number to evaluate.
1119      \param roffset (output parameter) the offset in the return vector where the result begins.
1120    
1121      The return value will be an existing vector so do not deallocate it.
1122      If the result is stored in v it should be stored at the offset given.
1123      Everything from offset to the end of v should be considered available for this method to use.
1124    */
1125    DataTypes::ValueType*
1126    DataLazy::resolveReduction(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1127    {
1128        // we assume that any collapsing has been done before we get here
1129        // since we only have one argument we don't need to think about only
1130        // processing single points.
1131      if (m_readytype!='E')
1132      {
1133        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1134      }
1135      const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
1136      double* result=&(v[offset]);
1137      roffset=offset;
1138      unsigned int ndpps=getNumDPPSample();
1139      unsigned int psize=DataTypes::noValues(getShape());
1140      switch (m_op)
1141      {
1142        case MINVAL:
1143        {
1144          for (unsigned int z=0;z<ndpps;++z)
1145          {
1146             FMin op;
1147             *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max());
1148             roffset+=psize;
1149             result++;
1150          }
1151        }
1152        break;
1153        case MAXVAL:
1154        {
1155          for (unsigned int z=0;z<ndpps;++z)
1156          {
1157             FMax op;
1158             *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max()*-1);
1159             roffset+=psize;
1160             result++;
1161          }
1162        }
1163        break;
1164        default:
1165        throw DataException("Programmer error - resolveReduction can not resolve operator "+opToString(m_op)+".");
1166      }
1167      return &v;
1168    }
1169    
1170    
1171    
1172    /*
1173      \brief Compute the value of the expression (unary operation) for the given sample.
1174      \return Vector which stores the value of the subexpression for the given sample.
1175      \param v A vector to store intermediate results.
1176      \param offset Index in v to begin storing results.
1177      \param sampleNo Sample number to evaluate.
1178      \param roffset (output parameter) the offset in the return vector where the result begins.
1179    
1180      The return value will be an existing vector so do not deallocate it.
1181      If the result is stored in v it should be stored at the offset given.
1182      Everything from offset to the end of v should be considered available for this method to use.
1183    */
1184    DataTypes::ValueType*
1185    DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1186    {
1187        // we assume that any collapsing has been done before we get here
1188        // since we only have one argument we don't need to think about only
1189        // processing single points.
1190      if (m_readytype!='E')
1191      {
1192        throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
1193      }
1194        // since we can't write the result over the input, we need a result offset further along
1195      size_t subroffset=roffset+m_samplesize;
1196    LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1197      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1198      roffset=offset;
1199      size_t loop=0;
1200      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1201      size_t step=getNoValues();
1202      switch (m_op)
1203      {
1204        case SYM:
1205        for (loop=0;loop<numsteps;++loop)
1206        {
1207            DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1208            subroffset+=step;
1209            offset+=step;
1210        }
1211        break;
1212        case NSYM:
1213        for (loop=0;loop<numsteps;++loop)
1214        {
1215            DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1216            subroffset+=step;
1217            offset+=step;
1218        }
1219        break;
1220        default:
1221        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1222      }
1223      return &v;
1224    }
1225    
1226    /*
1227      \brief Compute the value of the expression (unary operation) for the given sample.
1228      \return Vector which stores the value of the subexpression for the given sample.
1229      \param v A vector to store intermediate results.
1230      \param offset Index in v to begin storing results.
1231      \param sampleNo Sample number to evaluate.
1232      \param roffset (output parameter) the offset in the return vector where the result begins.
1233    
1234      The return value will be an existing vector so do not deallocate it.
1235      If the result is stored in v it should be stored at the offset given.
1236      Everything from offset to the end of v should be considered available for this method to use.
1237    */
1238    DataTypes::ValueType*
1239    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1240    {
1241        // we assume that any collapsing has been done before we get here
1242        // since we only have one argument we don't need to think about only
1243        // processing single points.
1244      if (m_readytype!='E')
1245      {
1246        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1247      }
1248        // since we can't write the result over the input, we need a result offset further along
1249      size_t subroffset;
1250      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1251    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1252    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1253      roffset=offset;
1254      size_t loop=0;
1255      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1256      size_t outstep=getNoValues();
1257      size_t instep=m_left->getNoValues();
1258    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1259      switch (m_op)
1260      {
1261        case TRACE:
1262        for (loop=0;loop<numsteps;++loop)
1263        {
1264    size_t zz=sampleNo*getNumDPPSample()+loop;
1265    if (zz==5800)
1266    {
1267    LAZYDEBUG(cerr << "point=" <<  zz<< endl;)
1268    LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1269    LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1270    LAZYDEBUG(cerr << subroffset << endl;)
1271    LAZYDEBUG(cerr << "output=" << offset << endl;)
1272    }
1273                DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1274    if (zz==5800)
1275    {
1276    LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1277    }
1278            subroffset+=instep;
1279            offset+=outstep;
1280        }
1281        break;
1282        case TRANS:
1283        for (loop=0;loop<numsteps;++loop)
1284        {
1285                DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1286            subroffset+=instep;
1287            offset+=outstep;
1288        }
1289        break;
1290        default:
1291        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1292      }
1293      return &v;
1294    }
1295    
1296    
1297    /*
1298      \brief Compute the value of the expression (unary operation with int params) for the given sample.
1299      \return Vector which stores the value of the subexpression for the given sample.
1300      \param v A vector to store intermediate results.
1301      \param offset Index in v to begin storing results.
1302      \param sampleNo Sample number to evaluate.
1303      \param roffset (output parameter) the offset in the return vector where the result begins.
1304    
1305      The return value will be an existing vector so do not deallocate it.
1306      If the result is stored in v it should be stored at the offset given.
1307      Everything from offset to the end of v should be considered available for this method to use.
1308    */
1309    DataTypes::ValueType*
1310    DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1311    {
1312        // we assume that any collapsing has been done before we get here
1313        // since we only have one argument we don't need to think about only
1314        // processing single points.
1315      if (m_readytype!='E')
1316      {
1317        throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1318      }
1319        // since we can't write the result over the input, we need a result offset further along
1320      size_t subroffset;
1321      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1322    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1323    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1324      roffset=offset;
1325      size_t loop=0;
1326      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1327      size_t outstep=getNoValues();
1328      size_t instep=m_left->getNoValues();
1329    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1330      switch (m_op)
1331      {
1332        case SWAP:
1333        for (loop=0;loop<numsteps;++loop)
1334        {
1335                DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1336            subroffset+=instep;
1337            offset+=outstep;
1338        }
1339        break;
1340        default:
1341        throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1342      }
1343      return &v;
1344    }
1345    
1346    
1347    
1348    #define PROC_OP(TYPE,X)                               \
1349        for (int j=0;j<onumsteps;++j)\
1350        {\
1351          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1352          { \
1353    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1354    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1355             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1356    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1357             lroffset+=leftstep; \
1358             rroffset+=rightstep; \
1359          }\
1360          lroffset+=oleftstep;\
1361          rroffset+=orightstep;\
1362        }
1363    
1364    /*
1365      \brief Compute the value of the expression (binary operation) for the given sample.
1366      \return Vector which stores the value of the subexpression for the given sample.
1367      \param v A vector to store intermediate results.
1368      \param offset Index in v to begin storing results.
1369      \param sampleNo Sample number to evaluate.
1370      \param roffset (output parameter) the offset in the return vector where the result begins.
1371    
1372      The return value will be an existing vector so do not deallocate it.
1373      If the result is stored in v it should be stored at the offset given.
1374      Everything from offset to the end of v should be considered available for this method to use.
1375    */
1376    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1377    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1378    // If both children are expanded, then we can process them in a single operation (we treat
1379    // the whole sample as one big datapoint.
1380    // If one of the children is not expanded, then we need to treat each point in the sample
1381    // individually.
1382    // There is an additional complication when scalar operations are considered.
1383    // For example, 2+Vector.
1384    // In this case each double within the point is treated individually
1385    DataTypes::ValueType*
1386    DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1387    {
1388    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1389    
1390      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1391        // first work out which of the children are expanded
1392      bool leftExp=(m_left->m_readytype=='E');
1393      bool rightExp=(m_right->m_readytype=='E');
1394      if (!leftExp && !rightExp)
1395      {
1396        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1397      }
1398      bool leftScalar=(m_left->getRank()==0);
1399      bool rightScalar=(m_right->getRank()==0);
1400      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1401      {
1402        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1403      }
1404      size_t leftsize=m_left->getNoValues();
1405      size_t rightsize=m_right->getNoValues();
1406      size_t chunksize=1;           // how many doubles will be processed in one go
1407      int leftstep=0;       // how far should the left offset advance after each step
1408      int rightstep=0;
1409      int numsteps=0;       // total number of steps for the inner loop
1410      int oleftstep=0;  // the o variables refer to the outer loop
1411      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1412      int onumsteps=1;
1413      
1414      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1415      bool RES=(rightExp && rightScalar);
1416      bool LS=(!leftExp && leftScalar); // left is a single scalar
1417      bool RS=(!rightExp && rightScalar);
1418      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1419      bool RN=(!rightExp && !rightScalar);
1420      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1421      bool REN=(rightExp && !rightScalar);
1422    
1423      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1424      {
1425        chunksize=m_left->getNumDPPSample()*leftsize;
1426        leftstep=0;
1427        rightstep=0;
1428        numsteps=1;
1429      }
1430      else if (LES || RES)
1431      {
1432        chunksize=1;
1433        if (LES)        // left is an expanded scalar
1434        {
1435            if (RS)
1436            {
1437               leftstep=1;
1438               rightstep=0;
1439               numsteps=m_left->getNumDPPSample();
1440            }
1441            else        // RN or REN
1442            {
1443               leftstep=0;
1444               oleftstep=1;
1445               rightstep=1;
1446               orightstep=(RN ? -(int)rightsize : 0);
1447               numsteps=rightsize;
1448               onumsteps=m_left->getNumDPPSample();
1449            }
1450        }
1451        else        // right is an expanded scalar
1452        {
1453            if (LS)
1454            {
1455               rightstep=1;
1456               leftstep=0;
1457               numsteps=m_right->getNumDPPSample();
1458            }
1459            else
1460            {
1461               rightstep=0;
1462               orightstep=1;
1463               leftstep=1;
1464               oleftstep=(LN ? -(int)leftsize : 0);
1465               numsteps=leftsize;
1466               onumsteps=m_right->getNumDPPSample();
1467            }
1468        }
1469      }
1470      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1471      {
1472        if (LEN)    // and Right will be a single value
1473        {
1474            chunksize=rightsize;
1475            leftstep=rightsize;
1476            rightstep=0;
1477            numsteps=m_left->getNumDPPSample();
1478            if (RS)
1479            {
1480               numsteps*=leftsize;
1481            }
1482        }
1483        else    // REN
1484        {
1485            chunksize=leftsize;
1486            rightstep=leftsize;
1487            leftstep=0;
1488            numsteps=m_right->getNumDPPSample();
1489            if (LS)
1490            {
1491               numsteps*=rightsize;
1492            }
1493        }
1494      }
1495    
1496      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1497        // Get the values of sub-expressions
1498      const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1499        // calcBufss for why we can't put offset as the 2nd param above
1500      const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1501        // the right child starts further along.
1502    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1503    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1504    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1505    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1506    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1507    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1508    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1509    
1510    
1511      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1512      switch(m_op)
1513      {
1514        case ADD:
1515            PROC_OP(NO_ARG,plus<double>());
1516        break;
1517        case SUB:
1518        PROC_OP(NO_ARG,minus<double>());
1519        break;
1520        case MUL:
1521        PROC_OP(NO_ARG,multiplies<double>());
1522        break;
1523        case DIV:
1524        PROC_OP(NO_ARG,divides<double>());
1525        break;
1526        case POW:
1527           PROC_OP(double (double,double),::pow);
1528        break;
1529        default:
1530        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1531      }
1532      roffset=offset;
1533      return &v;
1534    }
1535    
1536    
1537    
1538    /*
1539      \brief Compute the value of the expression (tensor product) for the given sample.
1540      \return Vector which stores the value of the subexpression for the given sample.
1541      \param v A vector to store intermediate results.
1542      \param offset Index in v to begin storing results.
1543      \param sampleNo Sample number to evaluate.
1544      \param roffset (output parameter) the offset in the return vector where the result begins.
1545    
1546      The return value will be an existing vector so do not deallocate it.
1547      If the result is stored in v it should be stored at the offset given.
1548      Everything from offset to the end of v should be considered available for this method to use.
1549    */
1550    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1551    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1552    // unlike the other resolve helpers, we must treat these datapoints separately.
1553    DataTypes::ValueType*
1554    DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1555    {
1556    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)
1557    
1558      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1559        // first work out which of the children are expanded
1560      bool leftExp=(m_left->m_readytype=='E');
1561      bool rightExp=(m_right->m_readytype=='E');
1562      int steps=getNumDPPSample();
1563    /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1564      int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1565      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1566      int rightStep=(rightExp?m_right->getNoValues() : 0);
1567    
1568      int resultStep=getNoValues();
1569        // Get the values of sub-expressions (leave a gap of one sample for the result).
1570      int gap=offset+m_samplesize;  
1571    
1572    LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1573    
1574      const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);
1575      gap+=m_left->getMaxSampleSize();
1576    
1577    
1578    LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1579    
1580    
1581      const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);
1582    
1583    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1584    cout << getNoValues() << endl;)
1585    LAZYDEBUG(cerr << "Result of left=";)
1586    LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1587    
1588    for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1589    {
1590    cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1591    if (i%4==0) cout << endl;
1592    })
1593    LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1594    LAZYDEBUG(
1595    for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1596    {
1597    cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1598    if (i%4==0) cout << endl;
1599    }
1600    cerr << endl;
1601    )
1602    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1603    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1604    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1605    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1606    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1607    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1608    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1609    
1610      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1611      switch(m_op)
1612      {
1613        case PROD:
1614        for (int i=0;i<steps;++i,resultp+=resultStep)
1615        {
1616    
1617    LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1618    LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1619    LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1620    
1621              const double *ptr_0 = &((*left)[lroffset]);
1622              const double *ptr_1 = &((*right)[rroffset]);
1623    
1624    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1625    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1626    
1627              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1628    
1629    LAZYDEBUG(cout << "Results--\n";
1630    {
1631      DataVector dv(getNoValues());
1632    for (int z=0;z<getNoValues();++z)
1633    {
1634      cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1635      if (z%4==0) cout << endl;
1636      dv[z]=resultp[z];
1637    }
1638    cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1639    cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1640    }
1641    )
1642          lroffset+=leftStep;
1643          rroffset+=rightStep;
1644        }
1645        break;
1646        default:
1647        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1648      }
1649      roffset=offset;
1650      return &v;
1651    }
1652    
1653    
1654    #ifdef LAZY_NODE_STORAGE
1655    
1656    // The result will be stored in m_samples
1657    // The return value is a pointer to the DataVector, offset is the offset within the return value
1658    const DataTypes::ValueType*
1659    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1660    {
1661    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1662        // collapse so we have a 'E' node or an IDENTITY for some other type
1663      if (m_readytype!='E' && m_op!=IDENTITY)
1664      {
1665        collapse();
1666      }
1667      if (m_op==IDENTITY)  
1668      {
1669        const ValueType& vec=m_id->getVectorRO();
1670    //     if (m_readytype=='C')
1671    //     {
1672    //  roffset=0;      // all samples read from the same position
1673    //  return &(m_samples);
1674    //     }
1675        roffset=m_id->getPointOffset(sampleNo, 0);
1676        return &(vec);
1677      }
1678      if (m_readytype!='E')
1679      {
1680        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1681      }
1682      if (m_sampleids[tid]==sampleNo)
1683      {
1684        roffset=tid*m_samplesize;
1685        return &(m_samples);        // sample is already resolved
1686      }
1687      m_sampleids[tid]=sampleNo;
1688      switch (getOpgroup(m_op))
1689      {
1690      case G_UNARY:
1691      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1692      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1693      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1694      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1695      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1696      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1697      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1698      default:
1699        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1700      }
1701    }
1702    
1703    const DataTypes::ValueType*
1704    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1705    {
1706        // we assume that any collapsing has been done before we get here
1707        // since we only have one argument we don't need to think about only
1708        // processing single points.
1709        // we will also know we won't get identity nodes
1710      if (m_readytype!='E')
1711      {
1712        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1713      }
1714      if (m_op==IDENTITY)
1715      {
1716        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1717      }
1718      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1719      const double* left=&((*leftres)[roffset]);
1720      roffset=m_samplesize*tid;
1721      double* result=&(m_samples[roffset]);
1722      switch (m_op)
1723      {
1724        case SIN:  
1725        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1726        break;
1727        case COS:
1728        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1729        break;
1730        case TAN:
1731        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1732        break;
1733        case ASIN:
1734        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1735        break;
1736        case ACOS:
1737        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1738        break;
1739        case ATAN:
1740        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1741        break;
1742        case SINH:
1743        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1744        break;
1745        case COSH:
1746        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1747        break;
1748        case TANH:
1749        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1750        break;
1751        case ERF:
1752    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1753        throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1754    #else
1755        tensor_unary_operation(m_samplesize, left, result, ::erf);
1756        break;
1757    #endif
1758       case ASINH:
1759    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1760        tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1761    #else
1762        tensor_unary_operation(m_samplesize, left, result, ::asinh);
1763    #endif  
1764        break;
1765       case ACOSH:
1766    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1767        tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1768    #else
1769        tensor_unary_operation(m_samplesize, left, result, ::acosh);
1770    #endif  
1771        break;
1772       case ATANH:
1773    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1774        tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1775    #else
1776        tensor_unary_operation(m_samplesize, left, result, ::atanh);
1777    #endif  
1778        break;
1779        case LOG10:
1780        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1781        break;
1782        case LOG:
1783        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1784        break;
1785        case SIGN:
1786        tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1787        break;
1788        case ABS:
1789        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1790        break;
1791        case NEG:
1792        tensor_unary_operation(m_samplesize, left, result, negate<double>());
1793        break;
1794        case POS:
1795        // it doesn't mean anything for delayed.
1796        // it will just trigger a deep copy of the lazy object
1797        throw DataException("Programmer error - POS not supported for lazy data.");
1798        break;
1799        case EXP:
1800        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1801        break;
1802        case SQRT:
1803        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1804        break;
1805        case RECIP:
1806        tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1807        break;
1808        case GZ:
1809        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1810        break;
1811        case LZ:
1812        tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1813        break;
1814        case GEZ:
1815        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1816        break;
1817        case LEZ:
1818        tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1819        break;
1820    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1821        case NEZ:
1822        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1823        break;
1824        case EZ:
1825        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1826        break;
1827    
1828        default:
1829        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1830      }
1831      return &(m_samples);
1832    }
1833    
1834    
1835    const DataTypes::ValueType*
1836    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1837    {
1838        // we assume that any collapsing has been done before we get here
1839        // since we only have one argument we don't need to think about only
1840        // processing single points.
1841        // we will also know we won't get identity nodes
1842      if (m_readytype!='E')
1843      {
1844        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1845      }
1846      if (m_op==IDENTITY)
1847      {
1848        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1849      }
1850      size_t loffset=0;
1851      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1852    
1853      roffset=m_samplesize*tid;
1854      unsigned int ndpps=getNumDPPSample();
1855      unsigned int psize=DataTypes::noValues(getShape());
1856      double* result=&(m_samples[roffset]);
1857      switch (m_op)
1858      {
1859        case MINVAL:
1860        {
1861          for (unsigned int z=0;z<ndpps;++z)
1862          {
1863            FMin op;
1864            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1865            loffset+=psize;
1866            result++;
1867          }
1868        }
1869        break;
1870        case MAXVAL:
1871        {
1872          for (unsigned int z=0;z<ndpps;++z)
1873          {
1874          FMax op;
1875          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1876          loffset+=psize;
1877          result++;
1878          }
1879        }
1880        break;
1881        default:
1882        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1883      }
1884      return &(m_samples);
1885    }
1886    
1887    const DataTypes::ValueType*
1888    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1889    {
1890        // we assume that any collapsing has been done before we get here
1891        // since we only have one argument we don't need to think about only
1892        // processing single points.
1893      if (m_readytype!='E')
1894      {
1895        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1896      }
1897      if (m_op==IDENTITY)
1898      {
1899        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1900      }
1901      size_t subroffset;
1902      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1903      roffset=m_samplesize*tid;
1904      size_t loop=0;
1905      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1906      size_t step=getNoValues();
1907      size_t offset=roffset;
1908      switch (m_op)
1909      {
1910        case SYM:
1911        for (loop=0;loop<numsteps;++loop)
1912        {
1913            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1914            subroffset+=step;
1915            offset+=step;
1916        }
1917        break;
1918        case NSYM:
1919        for (loop=0;loop<numsteps;++loop)
1920        {
1921            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1922            subroffset+=step;
1923            offset+=step;
1924        }
1925        break;
1926        default:
1927        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1928      }
1929      return &m_samples;
1930    }
1931    
1932    const DataTypes::ValueType*
1933    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1934    {
1935        // we assume that any collapsing has been done before we get here
1936        // since we only have one argument we don't need to think about only
1937        // processing single points.
1938      if (m_readytype!='E')
1939      {
1940        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1941      }
1942      if (m_op==IDENTITY)
1943      {
1944        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1945      }
1946      size_t subroffset;
1947      size_t offset;
1948      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1949      roffset=m_samplesize*tid;
1950      offset=roffset;
1951      size_t loop=0;
1952      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1953      size_t outstep=getNoValues();
1954      size_t instep=m_left->getNoValues();
1955      switch (m_op)
1956      {
1957        case TRACE:
1958        for (loop=0;loop<numsteps;++loop)
1959        {
1960                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1961            subroffset+=instep;
1962            offset+=outstep;
1963        }
1964        break;
1965        case TRANS:
1966        for (loop=0;loop<numsteps;++loop)
1967        {
1968                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1969            subroffset+=instep;
1970            offset+=outstep;
1971        }
1972        break;
1973        default:
1974        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1975      }
1976      return &m_samples;
1977    }
1978    
1979    
1980    const DataTypes::ValueType*
1981    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1982    {
1983      if (m_readytype!='E')
1984      {
1985        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1986      }
1987      if (m_op==IDENTITY)
1988      {
1989        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1990      }
1991      size_t subroffset;
1992      size_t offset;
1993      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1994      roffset=m_samplesize*tid;
1995      offset=roffset;
1996      size_t loop=0;
1997      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1998      size_t outstep=getNoValues();
1999      size_t instep=m_left->getNoValues();
2000      switch (m_op)
2001      {
2002        case SWAP:
2003        for (loop=0;loop<numsteps;++loop)
2004        {
2005                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
2006            subroffset+=instep;
2007            offset+=outstep;
2008        }
2009        break;
2010        default:
2011        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
2012      }
2013      return &m_samples;
2014    }
2015    
2016    
2017    
2018    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2019    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2020    // If both children are expanded, then we can process them in a single operation (we treat
2021    // the whole sample as one big datapoint.
2022    // If one of the children is not expanded, then we need to treat each point in the sample
2023    // individually.
2024    // There is an additional complication when scalar operations are considered.
2025    // For example, 2+Vector.
2026    // In this case each double within the point is treated individually
2027    const DataTypes::ValueType*
2028    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
2029    {
2030    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
2031    
2032      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2033        // first work out which of the children are expanded
2034      bool leftExp=(m_left->m_readytype=='E');
2035      bool rightExp=(m_right->m_readytype=='E');
2036      if (!leftExp && !rightExp)
2037      {
2038        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
2039      }
2040      bool leftScalar=(m_left->getRank()==0);
2041      bool rightScalar=(m_right->getRank()==0);
2042      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
2043      {
2044        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
2045      }
2046      size_t leftsize=m_left->getNoValues();
2047      size_t rightsize=m_right->getNoValues();
2048      size_t chunksize=1;           // how many doubles will be processed in one go
2049      int leftstep=0;       // how far should the left offset advance after each step
2050      int rightstep=0;
2051      int numsteps=0;       // total number of steps for the inner loop
2052      int oleftstep=0;  // the o variables refer to the outer loop
2053      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
2054      int onumsteps=1;
2055      
2056      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
2057      bool RES=(rightExp && rightScalar);
2058      bool LS=(!leftExp && leftScalar); // left is a single scalar
2059      bool RS=(!rightExp && rightScalar);
2060      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
2061      bool RN=(!rightExp && !rightScalar);
2062      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
2063      bool REN=(rightExp && !rightScalar);
2064    
2065      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
2066      {
2067        chunksize=m_left->getNumDPPSample()*leftsize;
2068        leftstep=0;
2069        rightstep=0;
2070        numsteps=1;
2071      }
2072      else if (LES || RES)
2073      {
2074        chunksize=1;
2075        if (LES)        // left is an expanded scalar
2076        {
2077            if (RS)
2078            {
2079               leftstep=1;
2080               rightstep=0;
2081               numsteps=m_left->getNumDPPSample();
2082            }
2083            else        // RN or REN
2084            {
2085               leftstep=0;
2086               oleftstep=1;
2087               rightstep=1;
2088               orightstep=(RN ? -(int)rightsize : 0);
2089               numsteps=rightsize;
2090               onumsteps=m_left->getNumDPPSample();
2091            }
2092        }
2093        else        // right is an expanded scalar
2094        {
2095            if (LS)
2096            {
2097               rightstep=1;
2098               leftstep=0;
2099               numsteps=m_right->getNumDPPSample();
2100            }
2101            else
2102            {
2103               rightstep=0;
2104               orightstep=1;
2105               leftstep=1;
2106               oleftstep=(LN ? -(int)leftsize : 0);
2107               numsteps=leftsize;
2108               onumsteps=m_right->getNumDPPSample();
2109            }
2110        }
2111      }
2112      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
2113      {
2114        if (LEN)    // and Right will be a single value
2115        {
2116            chunksize=rightsize;
2117            leftstep=rightsize;
2118            rightstep=0;
2119            numsteps=m_left->getNumDPPSample();
2120            if (RS)
2121            {
2122               numsteps*=leftsize;
2123            }
2124        }
2125        else    // REN
2126        {
2127            chunksize=leftsize;
2128            rightstep=leftsize;
2129            leftstep=0;
2130            numsteps=m_right->getNumDPPSample();
2131            if (LS)
2132            {
2133               numsteps*=rightsize;
2134            }
2135        }
2136      }
2137    
2138      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
2139        // Get the values of sub-expressions
2140      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
2141      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2142    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2143    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2144    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2145    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2146    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2147    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2148    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
2149    
2150    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2151    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2152    
2153    
2154      roffset=m_samplesize*tid;
2155      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
2156      switch(m_op)
2157      {
2158        case ADD:
2159            PROC_OP(NO_ARG,plus<double>());
2160        break;
2161        case SUB:
2162        PROC_OP(NO_ARG,minus<double>());
2163        break;
2164        case MUL:
2165        PROC_OP(NO_ARG,multiplies<double>());
2166        break;
2167        case DIV:
2168        PROC_OP(NO_ARG,divides<double>());
2169        break;
2170        case POW:
2171           PROC_OP(double (double,double),::pow);
2172        break;
2173        default:
2174        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2175      }
2176    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2177      return &m_samples;
2178    }
2179    
2180    
2181    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2182    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2183    // unlike the other resolve helpers, we must treat these datapoints separately.
2184    const DataTypes::ValueType*
2185    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2186    {
2187    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2188    
2189      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2190        // first work out which of the children are expanded
2191      bool leftExp=(m_left->m_readytype=='E');
2192      bool rightExp=(m_right->m_readytype=='E');
2193      int steps=getNumDPPSample();
2194      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
2195      int rightStep=(rightExp?m_right->getNoValues() : 0);
2196    
2197      int resultStep=getNoValues();
2198      roffset=m_samplesize*tid;
2199      size_t offset=roffset;
2200    
2201      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2202    
2203      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2204    
2205    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
2206    cout << getNoValues() << endl;)
2207    
2208    
2209    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2210    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2211    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2212    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2213    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2214    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2215    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2216    
2217      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
2218      switch(m_op)
2219      {
2220        case PROD:
2221        for (int i=0;i<steps;++i,resultp+=resultStep)
2222        {
2223              const double *ptr_0 = &((*left)[lroffset]);
2224              const double *ptr_1 = &((*right)[rroffset]);
2225    
2226    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2227    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2228    
2229              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2230    
2231          lroffset+=leftStep;
2232          rroffset+=rightStep;
2233        }
2234        break;
2235        default:
2236        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2237      }
2238      roffset=offset;
2239      return &m_samples;
2240    }
2241    #endif //LAZY_NODE_STORAGE
2242    
2243    /*
2244      \brief Compute the value of the expression for the given sample.
2245      \return Vector which stores the value of the subexpression for the given sample.
2246      \param v A vector to store intermediate results.
2247      \param offset Index in v to begin storing results.
2248      \param sampleNo Sample number to evaluate.
2249      \param roffset (output parameter) the offset in the return vector where the result begins.
2250    
2251      The return value will be an existing vector so do not deallocate it.
2252    */
2253    // the vector and the offset are a place where the method could write its data if it wishes
2254    // it is not obligated to do so. For example, if it has its own storage already, it can use that.
2255    // Hence the return value to indicate where the data is actually stored.
2256    // Regardless, the storage should be assumed to be used, even if it isn't.
2257    
2258    // the roffset is the offset within the returned vector where the data begins
2259    const DataTypes::ValueType*
2260    DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
2261    {
2262    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
2263        // collapse so we have a 'E' node or an IDENTITY for some other type
2264      if (m_readytype!='E' && m_op!=IDENTITY)
2265      {
2266        collapse();
2267      }
2268      if (m_op==IDENTITY)  
2269      {
2270        const ValueType& vec=m_id->getVectorRO();
2271        if (m_readytype=='C')
2272        {
2273        roffset=0;
2274    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
2275        return &(vec);
2276      }      }
2277        roffset=m_id->getPointOffset(sampleNo, 0);
2278    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
2279        return &(vec);
2280      }
2281      if (m_readytype!='E')
2282      {
2283        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
2284      }
2285      switch (getOpgroup(m_op))
2286      {
2287      case G_UNARY:
2288      case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
2289      case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
2290      case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
2291      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
2292      case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
2293      case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
2294      case G_REDUCTION: return resolveReduction(v, offset, sampleNo, roffset);
2295      default:
2296        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
2297    }    }
2298    return result;  
2299    }
2300    
2301    const DataTypes::ValueType*
2302    DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
2303    {
2304    #ifdef _OPENMP
2305        int tid=omp_get_thread_num();
2306    #else
2307        int tid=0;
2308    #endif
2309    #ifdef LAZY_NODE_STORAGE
2310        return resolveNodeSample(tid, sampleNo, roffset);
2311    #else
2312        return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
2313    #endif
2314    }
2315    
2316    
2317    // This needs to do the work of the identity constructor
2318    void
2319    DataLazy::resolveToIdentity()
2320    {
2321       if (m_op==IDENTITY)
2322        return;
2323    #ifndef LAZY_NODE_STORAGE
2324       DataReady_ptr p=resolveVectorWorker();
2325    #else
2326       DataReady_ptr p=resolveNodeWorker();
2327    #endif
2328       makeIdentity(p);
2329    }
2330    
2331    void DataLazy::makeIdentity(const DataReady_ptr& p)
2332    {
2333       m_op=IDENTITY;
2334       m_axis_offset=0;
2335       m_transpose=0;
2336       m_SL=m_SM=m_SR=0;
2337       m_children=m_height=0;
2338       m_id=p;
2339       if(p->isConstant()) {m_readytype='C';}
2340       else if(p->isExpanded()) {m_readytype='E';}
2341       else if (p->isTagged()) {m_readytype='T';}
2342       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
2343       m_buffsRequired=1;
2344       m_samplesize=p->getNumDPPSample()*p->getNoValues();
2345       m_maxsamplesize=m_samplesize;
2346       m_left.reset();
2347       m_right.reset();
2348  }  }
2349    
2350    
2351  DataReady_ptr  DataReady_ptr
2352  DataLazy::resolve()  DataLazy::resolve()
2353  {  {
2354    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
2355    // so the allocation of v needs to move inside the loop somehow      return m_id;
2356    }
2357    
2358    #ifdef LAZY_NODE_STORAGE
2359    
2360    // This version of resolve uses storage in each node to hold results
2361    DataReady_ptr
2362    DataLazy::resolveNodeWorker()
2363    {
2364      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2365      {
2366        collapse();
2367      }
2368      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
2369      {
2370        return m_id;
2371      }
2372        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2373      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2374      ValueType& resvec=result->getVectorRW();
2375      DataReady_ptr resptr=DataReady_ptr(result);
2376    
2377      int sample;
2378      int totalsamples=getNumSamples();
2379      const ValueType* res=0;   // Storage for answer
2380    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2381      #pragma omp parallel for private(sample,res) schedule(static)
2382      for (sample=0;sample<totalsamples;++sample)
2383      {
2384        size_t roffset=0;
2385    #ifdef _OPENMP
2386        res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2387    #else
2388        res=resolveNodeSample(0,sample,roffset);
2389    #endif
2390    LAZYDEBUG(cout << "Sample #" << sample << endl;)
2391    LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2392        DataVector::size_type outoffset=result->getPointOffset(sample,0);
2393        memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
2394      }
2395      return resptr;
2396    }
2397    
2398  cout << "Sample size=" << m_samplesize << endl;  #endif // LAZY_NODE_STORAGE
 cout << "Buffers=" << m_buffsRequired << endl;  
2399    
2400    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  // To simplify the memory management, all threads operate on one large vector, rather than one each.
2401    // Each sample is evaluated independently and copied into the result DataExpanded.
2402    DataReady_ptr
2403    DataLazy::resolveVectorWorker()
2404    {
2405    
2406    LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
2407    LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
2408      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2409      {
2410        collapse();
2411      }
2412      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
2413      {
2414        return m_id;
2415      }
2416        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2417      size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
2418        // storage to evaluate its expression
2419    int numthreads=1;    int numthreads=1;
2420  #ifdef _OPENMP  #ifdef _OPENMP
2421    numthreads=omp_get_max_threads();    numthreads=omp_get_max_threads();
   int threadnum=0;  
2422  #endif  #endif
2423    ValueType v(numthreads*threadbuffersize);    ValueType v(numthreads*threadbuffersize);
2424  cout << "Buffer created with size=" << v.size() << endl;  LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
2425    ValueType dummy(getNoValues());    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2426    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);    ValueType& resvec=result->getVectorRW();
   ValueType& resvec=result->getVector();  
2427    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
2428    int sample;    int sample;
2429    int resoffset;    size_t outoffset;     // offset in the output data
2430    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
2431    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;   // Vector storing the answer
2432      size_t resoffset=0;       // where in the vector to find the answer
2433    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2434      #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
2435    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
2436    {    {
2437    LAZYDEBUG(cout << "################################# " << sample << endl;)
2438  #ifdef _OPENMP  #ifdef _OPENMP
2439      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());      res=resolveVectorSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
2440  #else  #else
2441      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.      res=resolveVectorSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
2442  #endif  #endif
2443      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
2444      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
2445        outoffset=result->getPointOffset(sample,0);
2446    LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
2447        for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
2448      {      {
2449      resvec[resoffset]=res[i];  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
2450        resvec[outoffset]=(*res)[resoffset];
2451      }      }
2452    LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
2453    LAZYDEBUG(cerr << "*********************************" << endl;)
2454    }    }
2455    return resptr;    return resptr;
2456  }  }
# Line 431  DataLazy::toString() const Line 2460  DataLazy::toString() const
2460  {  {
2461    ostringstream oss;    ostringstream oss;
2462    oss << "Lazy Data:";    oss << "Lazy Data:";
2463    intoString(oss);    if (escriptParams.getPRINT_LAZY_TREE()==0)
2464      {
2465          intoString(oss);
2466      }
2467      else
2468      {
2469        oss << endl;
2470        intoTreeString(oss,"");
2471      }
2472    return oss.str();    return oss.str();
2473  }  }
2474    
2475    
2476  void  void
2477  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
2478  {  {
2479    //    oss << "[" << m_children <<";"<<m_height <<"]";
2480    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2481    {    {
2482    case G_IDENTITY:    case G_IDENTITY:
2483        if (m_id->isExpanded())
2484        {
2485           oss << "E";
2486        }
2487        else if (m_id->isTagged())
2488        {
2489          oss << "T";
2490        }
2491        else if (m_id->isConstant())
2492        {
2493          oss << "C";
2494        }
2495        else
2496        {
2497          oss << "?";
2498        }
2499      oss << '@' << m_id.get();      oss << '@' << m_id.get();
2500      break;      break;
2501    case G_BINARY:    case G_BINARY:
# Line 451  DataLazy::intoString(ostringstream& oss) Line 2506  DataLazy::intoString(ostringstream& oss)
2506      oss << ')';      oss << ')';
2507      break;      break;
2508    case G_UNARY:    case G_UNARY:
2509      case G_UNARY_P:
2510      case G_NP1OUT:
2511      case G_NP1OUT_P:
2512      case G_REDUCTION:
2513        oss << opToString(m_op) << '(';
2514        m_left->intoString(oss);
2515        oss << ')';
2516        break;
2517      case G_TENSORPROD:
2518        oss << opToString(m_op) << '(';
2519        m_left->intoString(oss);
2520        oss << ", ";
2521        m_right->intoString(oss);
2522        oss << ')';
2523        break;
2524      case G_NP1OUT_2P:
2525      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
2526      m_left->intoString(oss);      m_left->intoString(oss);
2527        oss << ", " << m_axis_offset << ", " << m_transpose;
2528      oss << ')';      oss << ')';
2529      break;      break;
2530    default:    default:
# Line 460  DataLazy::intoString(ostringstream& oss) Line 2532  DataLazy::intoString(ostringstream& oss)
2532    }    }
2533  }  }
2534    
2535  // Note that in this case, deepCopy does not make copies of the leaves.  
2536  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
2537    DataLazy::intoTreeString(ostringstream& oss, string indent) const
2538    {
2539      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
2540      switch (getOpgroup(m_op))
2541      {
2542      case G_IDENTITY:
2543        if (m_id->isExpanded())
2544        {
2545           oss << "E";
2546        }
2547        else if (m_id->isTagged())
2548        {
2549          oss << "T";
2550        }
2551        else if (m_id->isConstant())
2552        {
2553          oss << "C";
2554        }
2555        else
2556        {
2557          oss << "?";
2558        }
2559        oss << '@' << m_id.get() << endl;
2560        break;
2561      case G_BINARY:
2562        oss << opToString(m_op) << endl;
2563        indent+='.';
2564        m_left->intoTreeString(oss, indent);
2565        m_right->intoTreeString(oss, indent);
2566        break;
2567      case G_UNARY:
2568      case G_UNARY_P:
2569      case G_NP1OUT:
2570      case G_NP1OUT_P:
2571      case G_REDUCTION:
2572        oss << opToString(m_op) << endl;
2573        indent+='.';
2574        m_left->intoTreeString(oss, indent);
2575        break;
2576      case G_TENSORPROD:
2577        oss << opToString(m_op) << endl;
2578        indent+='.';
2579        m_left->intoTreeString(oss, indent);
2580        m_right->intoTreeString(oss, indent);
2581        break;
2582      case G_NP1OUT_2P:
2583        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2584        indent+='.';
2585        m_left->intoTreeString(oss, indent);
2586        break;
2587      default:
2588        oss << "UNKNOWN";
2589      }
2590    }
2591    
2592    
2593  DataAbstract*  DataAbstract*
2594  DataLazy::deepCopy()  DataLazy::deepCopy()
2595  {  {
2596    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
2597    {    {
2598      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2599      case G_UNARY:
2600      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2601      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2602      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2603      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2604      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2605      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2606      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2607      default:
2608        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2609    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2610  }  }
2611    
2612    
2613    
2614    // There is no single, natural interpretation of getLength on DataLazy.
2615    // Instances of DataReady can look at the size of their vectors.
2616    // For lazy though, it could be the size the data would be if it were resolved;
2617    // or it could be some function of the lengths of the DataReady instances which
2618    // form part of the expression.
2619    // Rather than have people making assumptions, I have disabled the method.
2620  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2621  DataLazy::getLength() const  DataLazy::getLength() const
2622  {  {
2623    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2624  }  }
2625    
2626    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 2630  DataLazy::getSlice(const DataTypes::Regi
2630    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2631  }  }
2632    
2633    
2634    // To do this we need to rely on our child nodes
2635    DataTypes::ValueType::size_type
2636    DataLazy::getPointOffset(int sampleNo,
2637                     int dataPointNo)
2638    {
2639      if (m_op==IDENTITY)
2640      {
2641        return m_id->getPointOffset(sampleNo,dataPointNo);
2642      }
2643      if (m_readytype!='E')
2644      {
2645        collapse();
2646        return m_id->getPointOffset(sampleNo,dataPointNo);
2647      }
2648      // at this point we do not have an identity node and the expression will be Expanded
2649      // so we only need to know which child to ask
2650      if (m_left->m_readytype=='E')
2651      {
2652        return m_left->getPointOffset(sampleNo,dataPointNo);
2653      }
2654      else
2655      {
2656        return m_right->getPointOffset(sampleNo,dataPointNo);
2657      }
2658    }
2659    
2660    // To do this we need to rely on our child nodes
2661  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2662  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2663                   int dataPointNo) const                   int dataPointNo) const
2664  {  {
2665    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2666      {
2667        return m_id->getPointOffset(sampleNo,dataPointNo);
2668      }
2669      if (m_readytype=='E')
2670      {
2671        // at this point we do not have an identity node and the expression will be Expanded
2672        // so we only need to know which child to ask
2673        if (m_left->m_readytype=='E')
2674        {
2675        return m_left->getPointOffset(sampleNo,dataPointNo);
2676        }
2677        else
2678        {
2679        return m_right->getPointOffset(sampleNo,dataPointNo);
2680        }
2681      }
2682      if (m_readytype=='C')
2683      {
2684        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2685      }
2686      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2687    }
2688    
2689    
2690    // I have decided to let Data:: handle this issue.
2691    void
2692    DataLazy::setToZero()
2693    {
2694    //   DataTypes::ValueType v(getNoValues(),0);
2695    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2696    //   m_op=IDENTITY;
2697    //   m_right.reset();  
2698    //   m_left.reset();
2699    //   m_readytype='C';
2700    //   m_buffsRequired=1;
2701    
2702      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2703      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2704    }
2705    
2706    bool
2707    DataLazy::actsExpanded() const
2708    {
2709        return (m_readytype=='E');
2710  }  }
2711    
2712  }   // end namespace  }   // end namespace

Legend:
Removed from v.1888  
changed lines
  Added in v.2737

  ViewVC Help
Powered by ViewVC 1.1.26