/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escript/src/DataLazy.cpp revision 2496 by jfenwick, Fri Jun 26 06:09:47 2009 UTC
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #include <iomanip>      // for some fancy formatting in debug
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT
46    // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}
47    //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}
48    
49    #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
50    
51    /*
52    How does DataLazy work?
53    ~~~~~~~~~~~~~~~~~~~~~~~
54    
55    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
56    denoted left and right.
57    
58    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
59    This means that all "internal" nodes in the structure are instances of DataLazy.
60    
61    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
62    Note that IDENITY is not considered a unary operation.
63    
64    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
65    It must however form a DAG (directed acyclic graph).
66    I will refer to individual DataLazy objects with the structure as nodes.
67    
68    Each node also stores:
69    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
70        evaluated.
71    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
72        evaluate the expression.
73    - m_samplesize ~ the number of doubles stored in a sample.
74    
75    When a new node is created, the above values are computed based on the values in the child nodes.
76    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
77    
78    The resolve method, which produces a DataReady from a DataLazy, does the following:
79    1) Create a DataReady to hold the new result.
80    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
81    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
82    
83    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
84    
85    resolveSample returns a Vector* and an offset within that vector where the result is stored.
86    Normally, this would be v, but for identity nodes their internal vector is returned instead.
87    
88    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
89    
90    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
91    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
92    
93    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
94    1) Add to the ES_optype.
95    2) determine what opgroup your operation belongs to (X)
96    3) add a string for the op to the end of ES_opstrings
97    4) increase ES_opcount
98    5) add an entry (X) to opgroups
99    6) add an entry to the switch in collapseToReady
100    7) add an entry to resolveX
101    */
102    
103    
104  using namespace std;  using namespace std;
105  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 107  using namespace boost;
107  namespace escript  namespace escript
108  {  {
109    
 const std::string&  
 opToString(ES_optype op);  
   
110  namespace  namespace
111  {  {
112    
   
   
113  enum ES_opgroup  enum ES_opgroup
114  {  {
115     G_UNKNOWN,     G_UNKNOWN,
116     G_IDENTITY,     G_IDENTITY,
117     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
118     G_UNARY     G_UNARY,     // pointwise operations with one argument
119       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
120       G_NP1OUT,        // non-pointwise op with one output
121       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
122       G_TENSORPROD,    // general tensor product
123       G_NP1OUT_2P      // non-pointwise op with one output requiring two params
124  };  };
125    
126    
127    
128    
129  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
130                "sin","cos","tan",
131              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
132              "asinh","acosh","atanh",              "asinh","acosh","atanh",
133              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
134              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
135  int ES_opcount=32;              "symmetric","nonsymmetric",
136  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
137              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose", "trace",
138              G_UNARY,G_UNARY,G_UNARY,                    // 19              "swapaxes"};
139              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27  int ES_opcount=41;
140              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
141                G_UNARY,G_UNARY,G_UNARY, //10
142                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
143                G_UNARY,G_UNARY,G_UNARY,                    // 20
144                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
145                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
146                G_NP1OUT,G_NP1OUT,
147                G_TENSORPROD,
148                G_NP1OUT_P, G_NP1OUT_P,
149                G_NP1OUT_2P};
150  inline  inline
151  ES_opgroup  ES_opgroup
152  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 163  resultFS(DataAbstract_ptr left, DataAbst
163      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
164      // programming error exception.      // programming error exception.
165    
166      FunctionSpace l=left->getFunctionSpace();
167      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
168      {    if (l!=r)
169          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
170      }      if (r.probeInterpolation(l))
171      return left->getFunctionSpace();      {
172        return l;
173        }
174        if (l.probeInterpolation(r))
175        {
176        return r;
177        }
178        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
179      }
180      return l;
181  }  }
182    
183  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
184    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
185  DataTypes::ShapeType  DataTypes::ShapeType
186  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187  {  {
188      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
189      {      {
190          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
191          {
192            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
193          }
194          if (left->getRank()==0)   // we need to allow scalar * anything
195          {
196            return right->getShape();
197          }
198          if (right->getRank()==0)
199          {
200            return left->getShape();
201          }
202          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
203      }      }
204      return left->getShape();      return left->getShape();
205  }  }
206    
207  size_t  // return the shape for "op left"
208  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
209    DataTypes::ShapeType
210    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
211  {  {
212     switch (getOpgroup(op))      switch(op)
213     {      {
214     case G_BINARY: return left->getLength();          case TRANS:
215     case G_UNARY: return left->getLength();         {            // for the scoping of variables
216     default:          const DataTypes::ShapeType& s=left->getShape();
217      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
218     }          int rank=left->getRank();
219            if (axis_offset<0 || axis_offset>rank)
220            {
221                   throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
222                }
223                for (int i=0; i<rank; i++)
224            {
225               int index = (axis_offset+i)%rank;
226                   sh.push_back(s[index]); // Append to new shape
227                }
228            return sh;
229           }
230        break;
231        case TRACE:
232           {
233            int rank=left->getRank();
234            if (rank<2)
235            {
236               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
237            }
238            if ((axis_offset>rank-2) || (axis_offset<0))
239            {
240               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
241            }
242            if (rank==2)
243            {
244               return DataTypes::scalarShape;
245            }
246            else if (rank==3)
247            {
248               DataTypes::ShapeType sh;
249                   if (axis_offset==0)
250               {
251                    sh.push_back(left->getShape()[2]);
252                   }
253                   else     // offset==1
254               {
255                sh.push_back(left->getShape()[0]);
256                   }
257               return sh;
258            }
259            else if (rank==4)
260            {
261               DataTypes::ShapeType sh;
262               const DataTypes::ShapeType& s=left->getShape();
263                   if (axis_offset==0)
264               {
265                    sh.push_back(s[2]);
266                    sh.push_back(s[3]);
267                   }
268                   else if (axis_offset==1)
269               {
270                    sh.push_back(s[0]);
271                    sh.push_back(s[3]);
272                   }
273               else     // offset==2
274               {
275                sh.push_back(s[0]);
276                sh.push_back(s[1]);
277               }
278               return sh;
279            }
280            else        // unknown rank
281            {
282               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
283            }
284           }
285        break;
286            default:
287        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
288        }
289    }
290    
291    DataTypes::ShapeType
292    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
293    {
294         // This code taken from the Data.cpp swapaxes() method
295         // Some of the checks are probably redundant here
296         int axis0_tmp,axis1_tmp;
297         const DataTypes::ShapeType& s=left->getShape();
298         DataTypes::ShapeType out_shape;
299         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
300         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
301         int rank=left->getRank();
302         if (rank<2) {
303            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
304         }
305         if (axis0<0 || axis0>rank-1) {
306            throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
307         }
308         if (axis1<0 || axis1>rank-1) {
309             throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
310         }
311         if (axis0 == axis1) {
312             throw DataException("Error - Data::swapaxes: axis indices must be different.");
313         }
314         if (axis0 > axis1) {
315             axis0_tmp=axis1;
316             axis1_tmp=axis0;
317         } else {
318             axis0_tmp=axis0;
319             axis1_tmp=axis1;
320         }
321         for (int i=0; i<rank; i++) {
322           if (i == axis0_tmp) {
323              out_shape.push_back(s[axis1_tmp]);
324           } else if (i == axis1_tmp) {
325              out_shape.push_back(s[axis0_tmp]);
326           } else {
327              out_shape.push_back(s[i]);
328           }
329         }
330        return out_shape;
331  }  }
332    
333    
334    // determine the output shape for the general tensor product operation
335    // the additional parameters return information required later for the product
336    // the majority of this code is copy pasted from C_General_Tensor_Product
337    DataTypes::ShapeType
338    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
339    {
340        
341      // Get rank and shape of inputs
342      int rank0 = left->getRank();
343      int rank1 = right->getRank();
344      const DataTypes::ShapeType& shape0 = left->getShape();
345      const DataTypes::ShapeType& shape1 = right->getShape();
346    
347      // Prepare for the loops of the product and verify compatibility of shapes
348      int start0=0, start1=0;
349      if (transpose == 0)       {}
350      else if (transpose == 1)  { start0 = axis_offset; }
351      else if (transpose == 2)  { start1 = rank1-axis_offset; }
352      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
353    
354      if (rank0<axis_offset)
355      {
356        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
357      }
358    
359      // Adjust the shapes for transpose
360      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
361      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
362      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
363      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
364    
365      // Prepare for the loops of the product
366      SL=1, SM=1, SR=1;
367      for (int i=0; i<rank0-axis_offset; i++)   {
368        SL *= tmpShape0[i];
369      }
370      for (int i=rank0-axis_offset; i<rank0; i++)   {
371        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
372          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
373        }
374        SM *= tmpShape0[i];
375      }
376      for (int i=axis_offset; i<rank1; i++)     {
377        SR *= tmpShape1[i];
378      }
379    
380      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
381      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
382      {         // block to limit the scope of out_index
383         int out_index=0;
384         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
385         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
386      }
387    
388      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
389      {
390         ostringstream os;
391         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
392         throw DataException(os.str());
393      }
394    
395      return shape2;
396    }
397    
398    // determine the number of samples requires to evaluate an expression combining left and right
399    // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
400    // The same goes for G_TENSORPROD
401    // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
402    // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
403    // multiple times
404  int  int
405  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
406  {  {
407     switch(getOpgroup(op))     switch(getOpgroup(op))
408     {     {
409     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
410     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
411     case G_UNARY: return max(left->getBuffsRequired(),1);     case G_UNARY:
412       case G_UNARY_P: return max(left->getBuffsRequired(),1);
413       case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
414       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
415       case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
416       case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
417     default:     default:
418      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
419     }     }
420  }  }
421    
422    
   
423  }   // end anonymous namespace  }   // end anonymous namespace
424    
425    
426    
427    // Return a string representing the operation
428  const std::string&  const std::string&
429  opToString(ES_optype op)  opToString(ES_optype op)
430  {  {
# Line 140  opToString(ES_optype op) Line 437  opToString(ES_optype op)
437    
438    
439  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
440      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
     m_op(IDENTITY)  
441  {  {
442     if (p->isLazy())     if (p->isLazy())
443     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
444      // I don't want identity of Lazy.      // I don't want identity of Lazy.
445      // Question: Why would that be so bad?      // Question: Why would that be so bad?
446      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 153  DataLazy::DataLazy(DataAbstract_ptr p) Line 448  DataLazy::DataLazy(DataAbstract_ptr p)
448     }     }
449     else     else
450     {     {
451      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
452        DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
453        makeIdentity(dr);
454    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
455     }     }
456     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
457  }  }
458    
459    
460    
461    
462  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
463      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),left->getShape()),
464      m_op(op)      m_op(op),
465        m_axis_offset(0),
466        m_transpose(0),
467        m_SL(0), m_SM(0), m_SR(0)
468  {  {
469     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
470     {     {
471      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
472     }     }
473    
474     DataLazy_ptr lleft;     DataLazy_ptr lleft;
475     if (!left->isLazy())     if (!left->isLazy())
476     {     {
# Line 178  DataLazy::DataLazy(DataAbstract_ptr left Line 480  DataLazy::DataLazy(DataAbstract_ptr left
480     {     {
481      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
482     }     }
483     m_length=left->getLength();     m_readytype=lleft->m_readytype;
484     m_left=lleft;     m_left=lleft;
485     m_buffsRequired=1;     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
486     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
487       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
488       m_children=m_left->m_children+1;
489       m_height=m_left->m_height+1;
490       SIZELIMIT
491  }  }
492    
493    
494  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
495    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
496      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
497      m_left(left),      m_op(op),
498      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
499  {  {
500     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
501       if ((getOpgroup(op)!=G_BINARY))
502     {     {
503      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
504     }     }
505     m_length=resultLength(m_left,m_right,m_op);  
506       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
507       {
508        FunctionSpace fs=getFunctionSpace();
509        Data ltemp(left);
510        Data tmp(ltemp,fs);
511        left=tmp.borrowDataPtr();
512       }
513       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
514       {
515        Data tmp(Data(right),getFunctionSpace());
516        right=tmp.borrowDataPtr();
517    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
518       }
519       left->operandCheck(*right);
520    
521       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
522       {
523        m_left=dynamic_pointer_cast<DataLazy>(left);
524    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
525       }
526       else
527       {
528        m_left=DataLazy_ptr(new DataLazy(left));
529    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
530       }
531       if (right->isLazy())
532       {
533        m_right=dynamic_pointer_cast<DataLazy>(right);
534    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
535       }
536       else
537       {
538        m_right=DataLazy_ptr(new DataLazy(right));
539    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
540       }
541       char lt=m_left->m_readytype;
542       char rt=m_right->m_readytype;
543       if (lt=='E' || rt=='E')
544       {
545        m_readytype='E';
546       }
547       else if (lt=='T' || rt=='T')
548       {
549        m_readytype='T';
550       }
551       else
552       {
553        m_readytype='C';
554       }
555     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
556     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
557  cout << "(2)Lazy created with " << m_samplesize << endl;     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
558       m_children=m_left->m_children+m_right->m_children+2;
559       m_height=max(m_left->m_height,m_right->m_height)+1;
560       SIZELIMIT
561    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
562  }  }
563    
564  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
565      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
566      m_op(op)      m_op(op),
567        m_axis_offset(axis_offset),
568        m_transpose(transpose)
569  {  {
570     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
571     {     {
572      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
573     }     }
574     if (left->isLazy())     if ((transpose>2) || (transpose<0))
575       {
576        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
577       }
578       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
579       {
580        FunctionSpace fs=getFunctionSpace();
581        Data ltemp(left);
582        Data tmp(ltemp,fs);
583        left=tmp.borrowDataPtr();
584       }
585       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
586       {
587        Data tmp(Data(right),getFunctionSpace());
588        right=tmp.borrowDataPtr();
589       }
590    //    left->operandCheck(*right);
591    
592       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
593     {     {
594      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
595     }     }
# Line 225  DataLazy::DataLazy(DataAbstract_ptr left Line 605  DataLazy::DataLazy(DataAbstract_ptr left
605     {     {
606      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
607     }     }
608       char lt=m_left->m_readytype;
609     m_length=resultLength(m_left,m_right,m_op);     char rt=m_right->m_readytype;
610       if (lt=='E' || rt=='E')
611       {
612        m_readytype='E';
613       }
614       else if (lt=='T' || rt=='T')
615       {
616        m_readytype='T';
617       }
618       else
619       {
620        m_readytype='C';
621       }
622     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
623       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
624     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
625  cout << "(3)Lazy created with " << m_samplesize << endl;     m_children=m_left->m_children+m_right->m_children+2;
626       m_height=max(m_left->m_height,m_right->m_height)+1;
627       SIZELIMIT
628    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
629    }
630    
631    
632    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
633        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
634        m_op(op),
635        m_axis_offset(axis_offset),
636        m_transpose(0),
637        m_tol(0)
638    {
639       if ((getOpgroup(op)!=G_NP1OUT_P))
640       {
641        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
642       }
643       DataLazy_ptr lleft;
644       if (!left->isLazy())
645       {
646        lleft=DataLazy_ptr(new DataLazy(left));
647       }
648       else
649       {
650        lleft=dynamic_pointer_cast<DataLazy>(left);
651       }
652       m_readytype=lleft->m_readytype;
653       m_left=lleft;
654       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
655       m_samplesize=getNumDPPSample()*getNoValues();
656       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
657       m_children=m_left->m_children+1;
658       m_height=m_left->m_height+1;
659       SIZELIMIT
660    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
661  }  }
662    
663    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
664        : parent(left->getFunctionSpace(), left->getShape()),
665        m_op(op),
666        m_axis_offset(0),
667        m_transpose(0),
668        m_tol(tol)
669    {
670       if ((getOpgroup(op)!=G_UNARY_P))
671       {
672        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
673       }
674       DataLazy_ptr lleft;
675       if (!left->isLazy())
676       {
677        lleft=DataLazy_ptr(new DataLazy(left));
678       }
679       else
680       {
681        lleft=dynamic_pointer_cast<DataLazy>(left);
682       }
683       m_readytype=lleft->m_readytype;
684       m_left=lleft;
685       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
686       m_samplesize=getNumDPPSample()*getNoValues();
687       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
688       m_children=m_left->m_children+1;
689       m_height=m_left->m_height+1;
690       SIZELIMIT
691    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
692    }
693    
694    
695    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
696        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
697        m_op(op),
698        m_axis_offset(axis0),
699        m_transpose(axis1),
700        m_tol(0)
701    {
702       if ((getOpgroup(op)!=G_NP1OUT_2P))
703       {
704        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
705       }
706       DataLazy_ptr lleft;
707       if (!left->isLazy())
708       {
709        lleft=DataLazy_ptr(new DataLazy(left));
710       }
711       else
712       {
713        lleft=dynamic_pointer_cast<DataLazy>(left);
714       }
715       m_readytype=lleft->m_readytype;
716       m_left=lleft;
717       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
718       m_samplesize=getNumDPPSample()*getNoValues();
719       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
720       m_children=m_left->m_children+1;
721       m_height=m_left->m_height+1;
722       SIZELIMIT
723    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
724    }
725    
726  DataLazy::~DataLazy()  DataLazy::~DataLazy()
727  {  {
# Line 245  DataLazy::getBuffsRequired() const Line 735  DataLazy::getBuffsRequired() const
735  }  }
736    
737    
738  // the vector and the offset are a place where the method could write its data if it wishes  size_t
739  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::getMaxSampleSize() const
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
 const double*  
 DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  
740  {  {
741    if (m_op==IDENTITY)        return m_maxsamplesize;
742    }
743    
744    
745    
746    size_t
747    DataLazy::getSampleBufferSize() const
748    {
749        return m_maxsamplesize*(max(1,m_buffsRequired));
750    }
751    
752    /*
753      \brief Evaluates the expression using methods on Data.
754      This does the work for the collapse method.
755      For reasons of efficiency do not call this method on DataExpanded nodes.
756    */
757    DataReady_ptr
758    DataLazy::collapseToReady()
759    {
760      if (m_readytype=='E')
761      { // this is more an efficiency concern than anything else
762        throw DataException("Programmer Error - do not use collapse on Expanded data.");
763      }
764      if (m_op==IDENTITY)
765    {    {
766      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
767    }    }
768    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
769    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
770    const double* right=0;    Data right;
771    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
772    {    {
773      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
774    }    }
775    double* result=&(v[offset]);    Data result;
776      switch(m_op)
777    {    {
778      switch(m_op)      case ADD:
779      {      result=left+right;
     case ADD:       // since these are pointwise ops, pretend each sample is one point  
     tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  
780      break;      break;
781      case SUB:            case SUB:      
782      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      result=left-right;
783      break;      break;
784      case MUL:            case MUL:      
785      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      result=left*right;
786      break;      break;
787      case DIV:            case DIV:      
788      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      result=left/right;
789      break;      break;
 // unary ops  
790      case SIN:      case SIN:
791      tensor_unary_operation(m_samplesize, left, result, ::sin);      result=left.sin();  
792        break;
793        case COS:
794        result=left.cos();
795        break;
796        case TAN:
797        result=left.tan();
798        break;
799        case ASIN:
800        result=left.asin();
801        break;
802        case ACOS:
803        result=left.acos();
804        break;
805        case ATAN:
806        result=left.atan();
807        break;
808        case SINH:
809        result=left.sinh();
810        break;
811        case COSH:
812        result=left.cosh();
813        break;
814        case TANH:
815        result=left.tanh();
816        break;
817        case ERF:
818        result=left.erf();
819        break;
820       case ASINH:
821        result=left.asinh();
822        break;
823       case ACOSH:
824        result=left.acosh();
825        break;
826       case ATANH:
827        result=left.atanh();
828        break;
829        case LOG10:
830        result=left.log10();
831        break;
832        case LOG:
833        result=left.log();
834        break;
835        case SIGN:
836        result=left.sign();
837        break;
838        case ABS:
839        result=left.abs();
840        break;
841        case NEG:
842        result=left.neg();
843        break;
844        case POS:
845        // it doesn't mean anything for delayed.
846        // it will just trigger a deep copy of the lazy object
847        throw DataException("Programmer error - POS not supported for lazy data.");
848        break;
849        case EXP:
850        result=left.exp();
851        break;
852        case SQRT:
853        result=left.sqrt();
854        break;
855        case RECIP:
856        result=left.oneOver();
857        break;
858        case GZ:
859        result=left.wherePositive();
860        break;
861        case LZ:
862        result=left.whereNegative();
863        break;
864        case GEZ:
865        result=left.whereNonNegative();
866        break;
867        case LEZ:
868        result=left.whereNonPositive();
869        break;
870        case NEZ:
871        result=left.whereNonZero(m_tol);
872        break;
873        case EZ:
874        result=left.whereZero(m_tol);
875        break;
876        case SYM:
877        result=left.symmetric();
878        break;
879        case NSYM:
880        result=left.nonsymmetric();
881        break;
882        case PROD:
883        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
884        break;
885        case TRANS:
886        result=left.transpose(m_axis_offset);
887        break;
888        case TRACE:
889        result=left.trace(m_axis_offset);
890        break;
891        case SWAP:
892        result=left.swapaxes(m_axis_offset, m_transpose);
893        break;
894        default:
895        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
896      }
897      return result.borrowReadyPtr();
898    }
899    
900    /*
901       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
902       This method uses the original methods on the Data class to evaluate the expressions.
903       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
904       the purpose of using DataLazy in the first place).
905    */
906    void
907    DataLazy::collapse()
908    {
909      if (m_op==IDENTITY)
910      {
911        return;
912      }
913      if (m_readytype=='E')
914      { // this is more an efficiency concern than anything else
915        throw DataException("Programmer Error - do not use collapse on Expanded data.");
916      }
917      m_id=collapseToReady();
918      m_op=IDENTITY;
919    }
920    
921    /*
922      \brief Compute the value of the expression (unary operation) for the given sample.
923      \return Vector which stores the value of the subexpression for the given sample.
924      \param v A vector to store intermediate results.
925      \param offset Index in v to begin storing results.
926      \param sampleNo Sample number to evaluate.
927      \param roffset (output parameter) the offset in the return vector where the result begins.
928    
929      The return value will be an existing vector so do not deallocate it.
930      If the result is stored in v it should be stored at the offset given.
931      Everything from offset to the end of v should be considered available for this method to use.
932    */
933    DataTypes::ValueType*
934    DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
935    {
936        // we assume that any collapsing has been done before we get here
937        // since we only have one argument we don't need to think about only
938        // processing single points.
939      if (m_readytype!='E')
940      {
941        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
942      }
943      const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
944      const double* left=&((*vleft)[roffset]);
945      double* result=&(v[offset]);
946      roffset=offset;
947      switch (m_op)
948      {
949        case SIN:  
950        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
951      break;      break;
952      case COS:      case COS:
953      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
954      break;      break;
955      case TAN:      case TAN:
956      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
957      break;      break;
958      case ASIN:      case ASIN:
959      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
960      break;      break;
961      case ACOS:      case ACOS:
962      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
963      break;      break;
964      case ATAN:      case ATAN:
965      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
966      break;      break;
967      case SINH:      case SINH:
968      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
969      break;      break;
970      case COSH:      case COSH:
971      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
972      break;      break;
973      case TANH:      case TANH:
974      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
975      break;      break;
976      case ERF:      case ERF:
977  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
978      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
979  #else  #else
980      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
981      break;      break;
982  #endif  #endif
983     case ASINH:     case ASINH:
984  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
985      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
986  #else  #else
987      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
988  #endif    #endif  
989      break;      break;
990     case ACOSH:     case ACOSH:
991  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
992      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
993  #else  #else
994      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
995  #endif    #endif  
996      break;      break;
997     case ATANH:     case ATANH:
998  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
999      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1000  #else  #else
1001      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1002  #endif    #endif  
1003      break;      break;
1004      case LOG10:      case LOG10:
1005      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1006      break;      break;
1007      case LOG:      case LOG:
1008      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1009      break;      break;
1010      case SIGN:      case SIGN:
1011      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1012      break;      break;
1013      case ABS:      case ABS:
1014      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1015      break;      break;
1016      case NEG:      case NEG:
1017      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 357  DataLazy::resolveSample(ValueType& v,int Line 1022  DataLazy::resolveSample(ValueType& v,int
1022      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1023      break;      break;
1024      case EXP:      case EXP:
1025      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1026      break;      break;
1027      case SQRT:      case SQRT:
1028      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1029      break;      break;
1030      case RECIP:      case RECIP:
1031      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 377  DataLazy::resolveSample(ValueType& v,int Line 1042  DataLazy::resolveSample(ValueType& v,int
1042      case LEZ:      case LEZ:
1043      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1044      break;      break;
1045    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1046        case NEZ:
1047        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1048        break;
1049        case EZ:
1050        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1051        break;
1052    
1053        default:
1054        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1055      }
1056      return &v;
1057    }
1058    
1059    
1060    
1061    
1062    
1063    
1064    /*
1065      \brief Compute the value of the expression (unary operation) for the given sample.
1066      \return Vector which stores the value of the subexpression for the given sample.
1067      \param v A vector to store intermediate results.
1068      \param offset Index in v to begin storing results.
1069      \param sampleNo Sample number to evaluate.
1070      \param roffset (output parameter) the offset in the return vector where the result begins.
1071    
1072      The return value will be an existing vector so do not deallocate it.
1073      If the result is stored in v it should be stored at the offset given.
1074      Everything from offset to the end of v should be considered available for this method to use.
1075    */
1076    DataTypes::ValueType*
1077    DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1078    {
1079        // we assume that any collapsing has been done before we get here
1080        // since we only have one argument we don't need to think about only
1081        // processing single points.
1082      if (m_readytype!='E')
1083      {
1084        throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
1085      }
1086        // since we can't write the result over the input, we need a result offset further along
1087      size_t subroffset=roffset+m_samplesize;
1088    LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1089      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1090      roffset=offset;
1091      size_t loop=0;
1092      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1093      size_t step=getNoValues();
1094      switch (m_op)
1095      {
1096        case SYM:
1097        for (loop=0;loop<numsteps;++loop)
1098        {
1099            DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1100            subroffset+=step;
1101            offset+=step;
1102        }
1103        break;
1104        case NSYM:
1105        for (loop=0;loop<numsteps;++loop)
1106        {
1107            DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1108            subroffset+=step;
1109            offset+=step;
1110        }
1111        break;
1112        default:
1113        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1114      }
1115      return &v;
1116    }
1117    
1118    /*
1119      \brief Compute the value of the expression (unary operation) for the given sample.
1120      \return Vector which stores the value of the subexpression for the given sample.
1121      \param v A vector to store intermediate results.
1122      \param offset Index in v to begin storing results.
1123      \param sampleNo Sample number to evaluate.
1124      \param roffset (output parameter) the offset in the return vector where the result begins.
1125    
1126      The return value will be an existing vector so do not deallocate it.
1127      If the result is stored in v it should be stored at the offset given.
1128      Everything from offset to the end of v should be considered available for this method to use.
1129    */
1130    DataTypes::ValueType*
1131    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1132    {
1133        // we assume that any collapsing has been done before we get here
1134        // since we only have one argument we don't need to think about only
1135        // processing single points.
1136      if (m_readytype!='E')
1137      {
1138        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1139      }
1140        // since we can't write the result over the input, we need a result offset further along
1141      size_t subroffset;
1142      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1143    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1144    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1145      roffset=offset;
1146      size_t loop=0;
1147      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1148      size_t outstep=getNoValues();
1149      size_t instep=m_left->getNoValues();
1150    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1151      switch (m_op)
1152      {
1153        case TRACE:
1154        for (loop=0;loop<numsteps;++loop)
1155        {
1156    size_t zz=sampleNo*getNumDPPSample()+loop;
1157    if (zz==5800)
1158    {
1159    LAZYDEBUG(cerr << "point=" <<  zz<< endl;)
1160    LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1161    LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1162    LAZYDEBUG(cerr << subroffset << endl;)
1163    LAZYDEBUG(cerr << "output=" << offset << endl;)
1164    }
1165                DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1166    if (zz==5800)
1167    {
1168    LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1169    }
1170            subroffset+=instep;
1171            offset+=outstep;
1172        }
1173        break;
1174        case TRANS:
1175        for (loop=0;loop<numsteps;++loop)
1176        {
1177                DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1178            subroffset+=instep;
1179            offset+=outstep;
1180        }
1181        break;
1182        default:
1183        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1184      }
1185      return &v;
1186    }
1187    
1188    
1189    /*
1190      \brief Compute the value of the expression (unary operation with int params) for the given sample.
1191      \return Vector which stores the value of the subexpression for the given sample.
1192      \param v A vector to store intermediate results.
1193      \param offset Index in v to begin storing results.
1194      \param sampleNo Sample number to evaluate.
1195      \param roffset (output parameter) the offset in the return vector where the result begins.
1196    
1197      The return value will be an existing vector so do not deallocate it.
1198      If the result is stored in v it should be stored at the offset given.
1199      Everything from offset to the end of v should be considered available for this method to use.
1200    */
1201    DataTypes::ValueType*
1202    DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1203    {
1204        // we assume that any collapsing has been done before we get here
1205        // since we only have one argument we don't need to think about only
1206        // processing single points.
1207      if (m_readytype!='E')
1208      {
1209        throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1210      }
1211        // since we can't write the result over the input, we need a result offset further along
1212      size_t subroffset;
1213      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1214    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1215    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1216      roffset=offset;
1217      size_t loop=0;
1218      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1219      size_t outstep=getNoValues();
1220      size_t instep=m_left->getNoValues();
1221    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1222      switch (m_op)
1223      {
1224        case SWAP:
1225        for (loop=0;loop<numsteps;++loop)
1226        {
1227                DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1228            subroffset+=instep;
1229            offset+=outstep;
1230        }
1231        break;
1232        default:
1233        throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1234      }
1235      return &v;
1236    }
1237    
1238    
1239    
1240    #define PROC_OP(TYPE,X)                               \
1241        for (int j=0;j<onumsteps;++j)\
1242        {\
1243          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1244          { \
1245    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1246    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1247             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1248    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1249             lroffset+=leftstep; \
1250             rroffset+=rightstep; \
1251          }\
1252          lroffset+=oleftstep;\
1253          rroffset+=orightstep;\
1254        }
1255    
1256    /*
1257      \brief Compute the value of the expression (binary operation) for the given sample.
1258      \return Vector which stores the value of the subexpression for the given sample.
1259      \param v A vector to store intermediate results.
1260      \param offset Index in v to begin storing results.
1261      \param sampleNo Sample number to evaluate.
1262      \param roffset (output parameter) the offset in the return vector where the result begins.
1263    
1264      The return value will be an existing vector so do not deallocate it.
1265      If the result is stored in v it should be stored at the offset given.
1266      Everything from offset to the end of v should be considered available for this method to use.
1267    */
1268    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1269    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1270    // If both children are expanded, then we can process them in a single operation (we treat
1271    // the whole sample as one big datapoint.
1272    // If one of the children is not expanded, then we need to treat each point in the sample
1273    // individually.
1274    // There is an additional complication when scalar operations are considered.
1275    // For example, 2+Vector.
1276    // In this case each double within the point is treated individually
1277    DataTypes::ValueType*
1278    DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1279    {
1280    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1281    
1282      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1283        // first work out which of the children are expanded
1284      bool leftExp=(m_left->m_readytype=='E');
1285      bool rightExp=(m_right->m_readytype=='E');
1286      if (!leftExp && !rightExp)
1287      {
1288        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1289      }
1290      bool leftScalar=(m_left->getRank()==0);
1291      bool rightScalar=(m_right->getRank()==0);
1292      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1293      {
1294        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1295      }
1296      size_t leftsize=m_left->getNoValues();
1297      size_t rightsize=m_right->getNoValues();
1298      size_t chunksize=1;           // how many doubles will be processed in one go
1299      int leftstep=0;       // how far should the left offset advance after each step
1300      int rightstep=0;
1301      int numsteps=0;       // total number of steps for the inner loop
1302      int oleftstep=0;  // the o variables refer to the outer loop
1303      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1304      int onumsteps=1;
1305      
1306      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1307      bool RES=(rightExp && rightScalar);
1308      bool LS=(!leftExp && leftScalar); // left is a single scalar
1309      bool RS=(!rightExp && rightScalar);
1310      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1311      bool RN=(!rightExp && !rightScalar);
1312      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1313      bool REN=(rightExp && !rightScalar);
1314    
1315      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1316      {
1317        chunksize=m_left->getNumDPPSample()*leftsize;
1318        leftstep=0;
1319        rightstep=0;
1320        numsteps=1;
1321      }
1322      else if (LES || RES)
1323      {
1324        chunksize=1;
1325        if (LES)        // left is an expanded scalar
1326        {
1327            if (RS)
1328            {
1329               leftstep=1;
1330               rightstep=0;
1331               numsteps=m_left->getNumDPPSample();
1332            }
1333            else        // RN or REN
1334            {
1335               leftstep=0;
1336               oleftstep=1;
1337               rightstep=1;
1338               orightstep=(RN ? -(int)rightsize : 0);
1339               numsteps=rightsize;
1340               onumsteps=m_left->getNumDPPSample();
1341            }
1342        }
1343        else        // right is an expanded scalar
1344        {
1345            if (LS)
1346            {
1347               rightstep=1;
1348               leftstep=0;
1349               numsteps=m_right->getNumDPPSample();
1350            }
1351            else
1352            {
1353               rightstep=0;
1354               orightstep=1;
1355               leftstep=1;
1356               oleftstep=(LN ? -(int)leftsize : 0);
1357               numsteps=leftsize;
1358               onumsteps=m_right->getNumDPPSample();
1359            }
1360        }
1361      }
1362      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1363      {
1364        if (LEN)    // and Right will be a single value
1365        {
1366            chunksize=rightsize;
1367            leftstep=rightsize;
1368            rightstep=0;
1369            numsteps=m_left->getNumDPPSample();
1370            if (RS)
1371            {
1372               numsteps*=leftsize;
1373            }
1374        }
1375        else    // REN
1376        {
1377            chunksize=leftsize;
1378            rightstep=leftsize;
1379            leftstep=0;
1380            numsteps=m_right->getNumDPPSample();
1381            if (LS)
1382            {
1383               numsteps*=rightsize;
1384            }
1385        }
1386      }
1387    
1388      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1389        // Get the values of sub-expressions
1390      const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on
1391        // calcBufss for why we can't put offset as the 2nd param above
1392      const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1393        // the right child starts further along.
1394    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1395    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1396    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1397    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1398    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1399    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1400    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1401    
1402    
1403      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1404      switch(m_op)
1405      {
1406        case ADD:
1407            PROC_OP(NO_ARG,plus<double>());
1408        break;
1409        case SUB:
1410        PROC_OP(NO_ARG,minus<double>());
1411        break;
1412        case MUL:
1413        PROC_OP(NO_ARG,multiplies<double>());
1414        break;
1415        case DIV:
1416        PROC_OP(NO_ARG,divides<double>());
1417        break;
1418        case POW:
1419           PROC_OP(double (double,double),::pow);
1420        break;
1421        default:
1422        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1423      }
1424      roffset=offset;
1425      return &v;
1426    }
1427    
1428    
1429    
1430    /*
1431      \brief Compute the value of the expression (tensor product) for the given sample.
1432      \return Vector which stores the value of the subexpression for the given sample.
1433      \param v A vector to store intermediate results.
1434      \param offset Index in v to begin storing results.
1435      \param sampleNo Sample number to evaluate.
1436      \param roffset (output parameter) the offset in the return vector where the result begins.
1437    
1438      The return value will be an existing vector so do not deallocate it.
1439      If the result is stored in v it should be stored at the offset given.
1440      Everything from offset to the end of v should be considered available for this method to use.
1441    */
1442    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1443    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1444    // unlike the other resolve helpers, we must treat these datapoints separately.
1445    DataTypes::ValueType*
1446    DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1447    {
1448    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)
1449    
1450      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1451        // first work out which of the children are expanded
1452      bool leftExp=(m_left->m_readytype=='E');
1453      bool rightExp=(m_right->m_readytype=='E');
1454      int steps=getNumDPPSample();
1455    /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1456      int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1457      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1458      int rightStep=(rightExp?m_right->getNoValues() : 0);
1459    
1460      int resultStep=getNoValues();
1461        // Get the values of sub-expressions (leave a gap of one sample for the result).
1462      int gap=offset+m_samplesize;  
1463    
1464    LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1465    
1466      const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1467      gap+=m_left->getMaxSampleSize();
1468    
1469    
1470    LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1471    
1472    
1473      const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1474    
1475    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1476    cout << getNoValues() << endl;)
1477    LAZYDEBUG(cerr << "Result of left=";)
1478    LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1479    
1480    for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1481    {
1482    cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1483    if (i%4==0) cout << endl;
1484    })
1485    LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1486    LAZYDEBUG(
1487    for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1488    {
1489    cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1490    if (i%4==0) cout << endl;
1491    }
1492    cerr << endl;
1493    )
1494    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1495    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1496    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1497    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1498    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1499    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1500    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1501    
1502      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1503      switch(m_op)
1504      {
1505        case PROD:
1506        for (int i=0;i<steps;++i,resultp+=resultStep)
1507        {
1508    
1509    LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1510    LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1511    LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1512    
1513              const double *ptr_0 = &((*left)[lroffset]);
1514              const double *ptr_1 = &((*right)[rroffset]);
1515    
1516    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1517    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1518    
1519              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1520    
1521    LAZYDEBUG(cout << "Results--\n";
1522    {
1523      DataVector dv(getNoValues());
1524    for (int z=0;z<getNoValues();++z)
1525    {
1526      cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1527      if (z%4==0) cout << endl;
1528      dv[z]=resultp[z];
1529    }
1530    cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1531    cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1532    }
1533    )
1534          lroffset+=leftStep;
1535          rroffset+=rightStep;
1536        }
1537        break;
1538      default:      default:
1539      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1540      }
1541      roffset=offset;
1542      return &v;
1543    }
1544    
1545    
1546    
1547    /*
1548      \brief Compute the value of the expression for the given sample.
1549      \return Vector which stores the value of the subexpression for the given sample.
1550      \param v A vector to store intermediate results.
1551      \param offset Index in v to begin storing results.
1552      \param sampleNo Sample number to evaluate.
1553      \param roffset (output parameter) the offset in the return vector where the result begins.
1554    
1555      The return value will be an existing vector so do not deallocate it.
1556    */
1557    // the vector and the offset are a place where the method could write its data if it wishes
1558    // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1559    // Hence the return value to indicate where the data is actually stored.
1560    // Regardless, the storage should be assumed to be used, even if it isn't.
1561    
1562    // the roffset is the offset within the returned vector where the data begins
1563    const DataTypes::ValueType*
1564    DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1565    {
1566    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1567        // collapse so we have a 'E' node or an IDENTITY for some other type
1568      if (m_readytype!='E' && m_op!=IDENTITY)
1569      {
1570        collapse();
1571      }
1572      if (m_op==IDENTITY)  
1573      {
1574        const ValueType& vec=m_id->getVectorRO();
1575        if (m_readytype=='C')
1576        {
1577        roffset=0;
1578    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1579        return &(vec);
1580      }      }
1581        roffset=m_id->getPointOffset(sampleNo, 0);
1582    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1583        return &(vec);
1584      }
1585      if (m_readytype!='E')
1586      {
1587        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1588    }    }
1589    return result;    switch (getOpgroup(m_op))
1590      {
1591      case G_UNARY:
1592      case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
1593      case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1594      case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1595      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1596      case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1597      case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
1598      default:
1599        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1600      }
1601    
1602  }  }
1603    
1604    const DataTypes::ValueType*
1605    DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
1606    {
1607    #ifdef _OPENMP
1608        int tid=omp_get_thread_num();
1609    #else
1610        int tid=0;
1611    #endif
1612        return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
1613    }
1614    
1615    
1616    // This needs to do the work of the idenity constructor
1617    void
1618    DataLazy::resolveToIdentity()
1619    {
1620       if (m_op==IDENTITY)
1621        return;
1622       DataReady_ptr p=resolve();
1623       makeIdentity(p);
1624    }
1625    
1626    void DataLazy::makeIdentity(const DataReady_ptr& p)
1627    {
1628       m_op=IDENTITY;
1629       m_axis_offset=0;
1630       m_transpose=0;
1631       m_SL=m_SM=m_SR=0;
1632       m_children=m_height=0;
1633       m_id=p;
1634       if(p->isConstant()) {m_readytype='C';}
1635       else if(p->isExpanded()) {m_readytype='E';}
1636       else if (p->isTagged()) {m_readytype='T';}
1637       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1638       m_buffsRequired=1;
1639       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1640       m_maxsamplesize=m_samplesize;
1641       m_left.reset();
1642       m_right.reset();
1643    }
1644    
1645    // To simplify the memory management, all threads operate on one large vector, rather than one each.
1646    // Each sample is evaluated independently and copied into the result DataExpanded.
1647  DataReady_ptr  DataReady_ptr
1648  DataLazy::resolve()  DataLazy::resolve()
1649  {  {
   // This is broken!     We need to have a buffer per thread!  
   // so the allocation of v needs to move inside the loop somehow  
   
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1650    
1651    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1652    LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1653      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1654      {
1655        collapse();
1656      }
1657      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1658      {
1659        return m_id;
1660      }
1661        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1662      size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1663        // storage to evaluate its expression
1664    int numthreads=1;    int numthreads=1;
1665  #ifdef _OPENMP  #ifdef _OPENMP
1666    numthreads=omp_get_max_threads();    numthreads=omp_get_max_threads();
   int threadnum=0;  
1667  #endif  #endif
1668    ValueType v(numthreads*threadbuffersize);    ValueType v(numthreads*threadbuffersize);
1669  cout << "Buffer created with size=" << v.size() << endl;  LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1670    ValueType dummy(getNoValues());    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1671    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);    ValueType& resvec=result->getVectorRW();
   ValueType& resvec=result->getVector();  
1672    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1673    int sample;    int sample;
1674    int resoffset;    size_t outoffset;     // offset in the output data
1675    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1676    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;   // Vector storing the answer
1677      size_t resoffset=0;       // where in the vector to find the answer
1678    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1679      #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1680    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
1681    {    {
1682          if (sample==0)  {ENABLEDEBUG}
1683    
1684    //       if (sample==5800/getNumDPPSample())  {ENABLEDEBUG}
1685    LAZYDEBUG(cout << "################################# " << sample << endl;)
1686  #ifdef _OPENMP  #ifdef _OPENMP
1687      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1688  #else  #else
1689      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
1690  #endif  #endif
1691      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1692      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1693        outoffset=result->getPointOffset(sample,0);
1694    LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1695        for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
1696      {      {
1697      resvec[resoffset]=res[i];  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
1698        resvec[outoffset]=(*res)[resoffset];
1699      }      }
1700    LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1701    LAZYDEBUG(cerr << "*********************************" << endl;)
1702        DISABLEDEBUG
1703    }    }
1704    return resptr;    return resptr;
1705  }  }
# Line 435  DataLazy::toString() const Line 1713  DataLazy::toString() const
1713    return oss.str();    return oss.str();
1714  }  }
1715    
1716    
1717  void  void
1718  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1719  {  {
1720    //    oss << "[" << m_children <<";"<<m_height <<"]";
1721    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1722    {    {
1723    case G_IDENTITY:    case G_IDENTITY:
1724        if (m_id->isExpanded())
1725        {
1726           oss << "E";
1727        }
1728        else if (m_id->isTagged())
1729        {
1730          oss << "T";
1731        }
1732        else if (m_id->isConstant())
1733        {
1734          oss << "C";
1735        }
1736        else
1737        {
1738          oss << "?";
1739        }
1740      oss << '@' << m_id.get();      oss << '@' << m_id.get();
1741      break;      break;
1742    case G_BINARY:    case G_BINARY:
# Line 451  DataLazy::intoString(ostringstream& oss) Line 1747  DataLazy::intoString(ostringstream& oss)
1747      oss << ')';      oss << ')';
1748      break;      break;
1749    case G_UNARY:    case G_UNARY:
1750      case G_UNARY_P:
1751      case G_NP1OUT:
1752      case G_NP1OUT_P:
1753      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1754      m_left->intoString(oss);      m_left->intoString(oss);
1755      oss << ')';      oss << ')';
1756      break;      break;
1757      case G_TENSORPROD:
1758        oss << opToString(m_op) << '(';
1759        m_left->intoString(oss);
1760        oss << ", ";
1761        m_right->intoString(oss);
1762        oss << ')';
1763        break;
1764      case G_NP1OUT_2P:
1765        oss << opToString(m_op) << '(';
1766        m_left->intoString(oss);
1767        oss << ", " << m_axis_offset << ", " << m_transpose;
1768        oss << ')';
1769        break;
1770    default:    default:
1771      oss << "UNKNOWN";      oss << "UNKNOWN";
1772    }    }
1773  }  }
1774    
 // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
1775  DataAbstract*  DataAbstract*
1776  DataLazy::deepCopy()  DataLazy::deepCopy()
1777  {  {
1778    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1779    {    {
1780      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1781      case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1782      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1783      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1784      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1785      default:
1786        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1787    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1788  }  }
1789    
1790    
1791    // There is no single, natural interpretation of getLength on DataLazy.
1792    // Instances of DataReady can look at the size of their vectors.
1793    // For lazy though, it could be the size the data would be if it were resolved;
1794    // or it could be some function of the lengths of the DataReady instances which
1795    // form part of the expression.
1796    // Rather than have people making assumptions, I have disabled the method.
1797  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1798  DataLazy::getLength() const  DataLazy::getLength() const
1799  {  {
1800    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1801  }  }
1802    
1803    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 1807  DataLazy::getSlice(const DataTypes::Regi
1807    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1808  }  }
1809    
1810    
1811    // To do this we need to rely on our child nodes
1812    DataTypes::ValueType::size_type
1813    DataLazy::getPointOffset(int sampleNo,
1814                     int dataPointNo)
1815    {
1816      if (m_op==IDENTITY)
1817      {
1818        return m_id->getPointOffset(sampleNo,dataPointNo);
1819      }
1820      if (m_readytype!='E')
1821      {
1822        collapse();
1823        return m_id->getPointOffset(sampleNo,dataPointNo);
1824      }
1825      // at this point we do not have an identity node and the expression will be Expanded
1826      // so we only need to know which child to ask
1827      if (m_left->m_readytype=='E')
1828      {
1829        return m_left->getPointOffset(sampleNo,dataPointNo);
1830      }
1831      else
1832      {
1833        return m_right->getPointOffset(sampleNo,dataPointNo);
1834      }
1835    }
1836    
1837    // To do this we need to rely on our child nodes
1838  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1839  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1840                   int dataPointNo) const                   int dataPointNo) const
1841  {  {
1842    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1843      {
1844        return m_id->getPointOffset(sampleNo,dataPointNo);
1845      }
1846      if (m_readytype=='E')
1847      {
1848        // at this point we do not have an identity node and the expression will be Expanded
1849        // so we only need to know which child to ask
1850        if (m_left->m_readytype=='E')
1851        {
1852        return m_left->getPointOffset(sampleNo,dataPointNo);
1853        }
1854        else
1855        {
1856        return m_right->getPointOffset(sampleNo,dataPointNo);
1857        }
1858      }
1859      if (m_readytype=='C')
1860      {
1861        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1862      }
1863      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1864    }
1865    
1866    
1867    // I have decided to let Data:: handle this issue.
1868    void
1869    DataLazy::setToZero()
1870    {
1871    //   DataTypes::ValueType v(getNoValues(),0);
1872    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1873    //   m_op=IDENTITY;
1874    //   m_right.reset();  
1875    //   m_left.reset();
1876    //   m_readytype='C';
1877    //   m_buffsRequired=1;
1878    
1879      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1880    
1881    }
1882    
1883    bool
1884    DataLazy::actsExpanded() const
1885    {
1886        return (m_readytype=='E');
1887  }  }
1888    
1889  }   // end namespace  }   // end namespace

Legend:
Removed from v.1888  
changed lines
  Added in v.2496

  ViewVC Help
Powered by ViewVC 1.1.26