/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escript/src/DataLazy.cpp revision 2721 by jfenwick, Fri Oct 16 05:40:12 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #include <iomanip>      // for some fancy formatting in debug
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49    
50    /*
51    How does DataLazy work?
52    ~~~~~~~~~~~~~~~~~~~~~~~
53    
54    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
55    denoted left and right.
56    
57    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
58    This means that all "internal" nodes in the structure are instances of DataLazy.
59    
60    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
61    Note that IDENITY is not considered a unary operation.
62    
63    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
64    It must however form a DAG (directed acyclic graph).
65    I will refer to individual DataLazy objects with the structure as nodes.
66    
67    Each node also stores:
68    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69        evaluated.
70    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71        evaluate the expression.
72    - m_samplesize ~ the number of doubles stored in a sample.
73    
74    When a new node is created, the above values are computed based on the values in the child nodes.
75    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
76    
77    The resolve method, which produces a DataReady from a DataLazy, does the following:
78    1) Create a DataReady to hold the new result.
79    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
80    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
81    
82    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
83    
84    resolveSample returns a Vector* and an offset within that vector where the result is stored.
85    Normally, this would be v, but for identity nodes their internal vector is returned instead.
86    
87    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
88    
89    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91    
92    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93    1) Add to the ES_optype.
94    2) determine what opgroup your operation belongs to (X)
95    3) add a string for the op to the end of ES_opstrings
96    4) increase ES_opcount
97    5) add an entry (X) to opgroups
98    6) add an entry to the switch in collapseToReady
99    7) add an entry to resolveX
100    */
101    
102    
103  using namespace std;  using namespace std;
104  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 106  using namespace boost;
106  namespace escript  namespace escript
107  {  {
108    
 const std::string&  
 opToString(ES_optype op);  
   
109  namespace  namespace
110  {  {
111    
   
   
112  enum ES_opgroup  enum ES_opgroup
113  {  {
114     G_UNKNOWN,     G_UNKNOWN,
115     G_IDENTITY,     G_IDENTITY,
116     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
117     G_UNARY     G_UNARY,     // pointwise operations with one argument
118       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
119       G_NP1OUT,        // non-pointwise op with one output
120       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
121       G_TENSORPROD,    // general tensor product
122       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
123       G_REDUCTION      // non-pointwise unary op with a scalar output
124  };  };
125    
126    
127    
128    
129  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
130                "sin","cos","tan",
131              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
132              "asinh","acosh","atanh",              "asinh","acosh","atanh",
133              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
134              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
135  int ES_opcount=32;              "symmetric","nonsymmetric",
136  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
137              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose", "trace",
138              G_UNARY,G_UNARY,G_UNARY,                    // 19              "swapaxes",
139              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27              "minval", "maxval"};
140              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};  int ES_opcount=43;
141    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
142                G_UNARY,G_UNARY,G_UNARY, //10
143                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
144                G_UNARY,G_UNARY,G_UNARY,                    // 20
145                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
146                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
147                G_NP1OUT,G_NP1OUT,
148                G_TENSORPROD,
149                G_NP1OUT_P, G_NP1OUT_P,
150                G_NP1OUT_2P,
151                G_REDUCTION, G_REDUCTION};
152  inline  inline
153  ES_opgroup  ES_opgroup
154  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 165  resultFS(DataAbstract_ptr left, DataAbst
165      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
166      // programming error exception.      // programming error exception.
167    
168      FunctionSpace l=left->getFunctionSpace();
169      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
170      {    if (l!=r)
171          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
172      }      if (r.probeInterpolation(l))
173      return left->getFunctionSpace();      {
174        return l;
175        }
176        if (l.probeInterpolation(r))
177        {
178        return r;
179        }
180        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
181      }
182      return l;
183  }  }
184    
185  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
186    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
187  DataTypes::ShapeType  DataTypes::ShapeType
188  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
189  {  {
190      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
191      {      {
192          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
193          {
194            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
195          }
196    
197          if (left->getRank()==0)   // we need to allow scalar * anything
198          {
199            return right->getShape();
200          }
201          if (right->getRank()==0)
202          {
203            return left->getShape();
204          }
205          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
206      }      }
207      return left->getShape();      return left->getShape();
208  }  }
209    
210  size_t  // return the shape for "op left"
211  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
212    DataTypes::ShapeType
213    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
214  {  {
215     switch (getOpgroup(op))      switch(op)
216     {      {
217     case G_BINARY: return left->getLength();          case TRANS:
218     case G_UNARY: return left->getLength();         {            // for the scoping of variables
219     default:          const DataTypes::ShapeType& s=left->getShape();
220      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
221     }          int rank=left->getRank();
222            if (axis_offset<0 || axis_offset>rank)
223            {
224                   throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
225                }
226                for (int i=0; i<rank; i++)
227            {
228               int index = (axis_offset+i)%rank;
229                   sh.push_back(s[index]); // Append to new shape
230                }
231            return sh;
232           }
233        break;
234        case TRACE:
235           {
236            int rank=left->getRank();
237            if (rank<2)
238            {
239               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
240            }
241            if ((axis_offset>rank-2) || (axis_offset<0))
242            {
243               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
244            }
245            if (rank==2)
246            {
247               return DataTypes::scalarShape;
248            }
249            else if (rank==3)
250            {
251               DataTypes::ShapeType sh;
252                   if (axis_offset==0)
253               {
254                    sh.push_back(left->getShape()[2]);
255                   }
256                   else     // offset==1
257               {
258                sh.push_back(left->getShape()[0]);
259                   }
260               return sh;
261            }
262            else if (rank==4)
263            {
264               DataTypes::ShapeType sh;
265               const DataTypes::ShapeType& s=left->getShape();
266                   if (axis_offset==0)
267               {
268                    sh.push_back(s[2]);
269                    sh.push_back(s[3]);
270                   }
271                   else if (axis_offset==1)
272               {
273                    sh.push_back(s[0]);
274                    sh.push_back(s[3]);
275                   }
276               else     // offset==2
277               {
278                sh.push_back(s[0]);
279                sh.push_back(s[1]);
280               }
281               return sh;
282            }
283            else        // unknown rank
284            {
285               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
286            }
287           }
288        break;
289            default:
290        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
291        }
292    }
293    
294    DataTypes::ShapeType
295    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
296    {
297         // This code taken from the Data.cpp swapaxes() method
298         // Some of the checks are probably redundant here
299         int axis0_tmp,axis1_tmp;
300         const DataTypes::ShapeType& s=left->getShape();
301         DataTypes::ShapeType out_shape;
302         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
303         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
304         int rank=left->getRank();
305         if (rank<2) {
306            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
307         }
308         if (axis0<0 || axis0>rank-1) {
309            throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
310         }
311         if (axis1<0 || axis1>rank-1) {
312             throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
313         }
314         if (axis0 == axis1) {
315             throw DataException("Error - Data::swapaxes: axis indices must be different.");
316         }
317         if (axis0 > axis1) {
318             axis0_tmp=axis1;
319             axis1_tmp=axis0;
320         } else {
321             axis0_tmp=axis0;
322             axis1_tmp=axis1;
323         }
324         for (int i=0; i<rank; i++) {
325           if (i == axis0_tmp) {
326              out_shape.push_back(s[axis1_tmp]);
327           } else if (i == axis1_tmp) {
328              out_shape.push_back(s[axis0_tmp]);
329           } else {
330              out_shape.push_back(s[i]);
331           }
332         }
333        return out_shape;
334    }
335    
336    
337    // determine the output shape for the general tensor product operation
338    // the additional parameters return information required later for the product
339    // the majority of this code is copy pasted from C_General_Tensor_Product
340    DataTypes::ShapeType
341    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
342    {
343        
344      // Get rank and shape of inputs
345      int rank0 = left->getRank();
346      int rank1 = right->getRank();
347      const DataTypes::ShapeType& shape0 = left->getShape();
348      const DataTypes::ShapeType& shape1 = right->getShape();
349    
350      // Prepare for the loops of the product and verify compatibility of shapes
351      int start0=0, start1=0;
352      if (transpose == 0)       {}
353      else if (transpose == 1)  { start0 = axis_offset; }
354      else if (transpose == 2)  { start1 = rank1-axis_offset; }
355      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
356    
357      if (rank0<axis_offset)
358      {
359        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
360      }
361    
362      // Adjust the shapes for transpose
363      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
364      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
365      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
366      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
367    
368      // Prepare for the loops of the product
369      SL=1, SM=1, SR=1;
370      for (int i=0; i<rank0-axis_offset; i++)   {
371        SL *= tmpShape0[i];
372      }
373      for (int i=rank0-axis_offset; i<rank0; i++)   {
374        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
375          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
376        }
377        SM *= tmpShape0[i];
378      }
379      for (int i=axis_offset; i<rank1; i++)     {
380        SR *= tmpShape1[i];
381      }
382    
383      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
384      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
385      {         // block to limit the scope of out_index
386         int out_index=0;
387         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
388         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
389      }
390    
391      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
392      {
393         ostringstream os;
394         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
395         throw DataException(os.str());
396      }
397    
398      return shape2;
399  }  }
400    
401    // determine the number of samples requires to evaluate an expression combining left and right
402    // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
403    // The same goes for G_TENSORPROD
404    // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
405    // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
406    // multiple times
407  int  int
408  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
409  {  {
410     switch(getOpgroup(op))     switch(getOpgroup(op))
411     {     {
412     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
413     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
414     case G_UNARY: return max(left->getBuffsRequired(),1);     case G_REDUCTION:
415       case G_UNARY:
416       case G_UNARY_P: return max(left->getBuffsRequired(),1);
417       case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
418       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
419       case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
420       case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
421     default:     default:
422      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
423     }     }
424  }  }
425    
426    
   
427  }   // end anonymous namespace  }   // end anonymous namespace
428    
429    
430    
431    // Return a string representing the operation
432  const std::string&  const std::string&
433  opToString(ES_optype op)  opToString(ES_optype op)
434  {  {
# Line 138  opToString(ES_optype op) Line 439  opToString(ES_optype op)
439    return ES_opstrings[op];    return ES_opstrings[op];
440  }  }
441    
442    #ifdef LAZY_NODE_STORAGE
443    void DataLazy::LazyNodeSetup()
444    {
445    #ifdef _OPENMP
446        int numthreads=omp_get_max_threads();
447        m_samples.resize(numthreads*m_samplesize);
448        m_sampleids=new int[numthreads];
449        for (int i=0;i<numthreads;++i)
450        {
451            m_sampleids[i]=-1;  
452        }
453    #else
454        m_samples.resize(m_samplesize);
455        m_sampleids=new int[1];
456        m_sampleids[0]=-1;
457    #endif  // _OPENMP
458    }
459    #endif   // LAZY_NODE_STORAGE
460    
461    
462    // Creates an identity node
463  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
464      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
465      m_op(IDENTITY)  #ifdef LAZY_NODE_STORAGE
466        ,m_sampleids(0),
467        m_samples(1)
468    #endif
469  {  {
470     if (p->isLazy())     if (p->isLazy())
471     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
472      // I don't want identity of Lazy.      // I don't want identity of Lazy.
473      // Question: Why would that be so bad?      // Question: Why would that be so bad?
474      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 153  DataLazy::DataLazy(DataAbstract_ptr p) Line 476  DataLazy::DataLazy(DataAbstract_ptr p)
476     }     }
477     else     else
478     {     {
479      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
480        DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
481        makeIdentity(dr);
482    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
483     }     }
484     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
485  }  }
486    
487  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
488      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
489      m_op(op)      m_op(op),
490        m_axis_offset(0),
491        m_transpose(0),
492        m_SL(0), m_SM(0), m_SR(0)
493  {  {
494     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
495     {     {
496      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
497     }     }
498    
499     DataLazy_ptr lleft;     DataLazy_ptr lleft;
500     if (!left->isLazy())     if (!left->isLazy())
501     {     {
# Line 178  DataLazy::DataLazy(DataAbstract_ptr left Line 505  DataLazy::DataLazy(DataAbstract_ptr left
505     {     {
506      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
507     }     }
508     m_length=left->getLength();     m_readytype=lleft->m_readytype;
509     m_left=lleft;     m_left=lleft;
510     m_buffsRequired=1;     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
511     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
512       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
513       m_children=m_left->m_children+1;
514       m_height=m_left->m_height+1;
515    #ifdef LAZY_NODE_STORAGE
516       LazyNodeSetup();
517    #endif
518       SIZELIMIT
519  }  }
520    
521    
522  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
523    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
524      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
525      m_left(left),      m_op(op),
526      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
527  {  {
528     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
529       if ((getOpgroup(op)!=G_BINARY))
530     {     {
531      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
532     }     }
533     m_length=resultLength(m_left,m_right,m_op);  
534       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
535       {
536        FunctionSpace fs=getFunctionSpace();
537        Data ltemp(left);
538        Data tmp(ltemp,fs);
539        left=tmp.borrowDataPtr();
540       }
541       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
542       {
543        Data tmp(Data(right),getFunctionSpace());
544        right=tmp.borrowDataPtr();
545    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
546       }
547       left->operandCheck(*right);
548    
549       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
550       {
551        m_left=dynamic_pointer_cast<DataLazy>(left);
552    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
553       }
554       else
555       {
556        m_left=DataLazy_ptr(new DataLazy(left));
557    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
558       }
559       if (right->isLazy())
560       {
561        m_right=dynamic_pointer_cast<DataLazy>(right);
562    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
563       }
564       else
565       {
566        m_right=DataLazy_ptr(new DataLazy(right));
567    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
568       }
569       char lt=m_left->m_readytype;
570       char rt=m_right->m_readytype;
571       if (lt=='E' || rt=='E')
572       {
573        m_readytype='E';
574       }
575       else if (lt=='T' || rt=='T')
576       {
577        m_readytype='T';
578       }
579       else
580       {
581        m_readytype='C';
582       }
583     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
584     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
585  cout << "(2)Lazy created with " << m_samplesize << endl;     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
586       m_children=m_left->m_children+m_right->m_children+2;
587       m_height=max(m_left->m_height,m_right->m_height)+1;
588    #ifdef LAZY_NODE_STORAGE
589       LazyNodeSetup();
590    #endif
591       SIZELIMIT
592    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
593  }  }
594    
595  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
596      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
597      m_op(op)      m_op(op),
598        m_axis_offset(axis_offset),
599        m_transpose(transpose)
600  {  {
601     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
602       {
603        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
604       }
605       if ((transpose>2) || (transpose<0))
606     {     {
607      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
608     }     }
609     if (left->isLazy())     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
610       {
611        FunctionSpace fs=getFunctionSpace();
612        Data ltemp(left);
613        Data tmp(ltemp,fs);
614        left=tmp.borrowDataPtr();
615       }
616       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
617       {
618        Data tmp(Data(right),getFunctionSpace());
619        right=tmp.borrowDataPtr();
620       }
621    //    left->operandCheck(*right);
622    
623       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
624     {     {
625      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
626     }     }
# Line 225  DataLazy::DataLazy(DataAbstract_ptr left Line 636  DataLazy::DataLazy(DataAbstract_ptr left
636     {     {
637      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
638     }     }
639       char lt=m_left->m_readytype;
640     m_length=resultLength(m_left,m_right,m_op);     char rt=m_right->m_readytype;
641       if (lt=='E' || rt=='E')
642       {
643        m_readytype='E';
644       }
645       else if (lt=='T' || rt=='T')
646       {
647        m_readytype='T';
648       }
649       else
650       {
651        m_readytype='C';
652       }
653     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
654       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
655     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
656  cout << "(3)Lazy created with " << m_samplesize << endl;     m_children=m_left->m_children+m_right->m_children+2;
657       m_height=max(m_left->m_height,m_right->m_height)+1;
658    #ifdef LAZY_NODE_STORAGE
659       LazyNodeSetup();
660    #endif
661       SIZELIMIT
662    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
663    }
664    
665    
666    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
667        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
668        m_op(op),
669        m_axis_offset(axis_offset),
670        m_transpose(0),
671        m_tol(0)
672    {
673       if ((getOpgroup(op)!=G_NP1OUT_P))
674       {
675        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
676       }
677       DataLazy_ptr lleft;
678       if (!left->isLazy())
679       {
680        lleft=DataLazy_ptr(new DataLazy(left));
681       }
682       else
683       {
684        lleft=dynamic_pointer_cast<DataLazy>(left);
685       }
686       m_readytype=lleft->m_readytype;
687       m_left=lleft;
688       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
689       m_samplesize=getNumDPPSample()*getNoValues();
690       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
691       m_children=m_left->m_children+1;
692       m_height=m_left->m_height+1;
693    #ifdef LAZY_NODE_STORAGE
694       LazyNodeSetup();
695    #endif
696       SIZELIMIT
697    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
698    }
699    
700    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
701        : parent(left->getFunctionSpace(), left->getShape()),
702        m_op(op),
703        m_axis_offset(0),
704        m_transpose(0),
705        m_tol(tol)
706    {
707       if ((getOpgroup(op)!=G_UNARY_P))
708       {
709        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
710       }
711       DataLazy_ptr lleft;
712       if (!left->isLazy())
713       {
714        lleft=DataLazy_ptr(new DataLazy(left));
715       }
716       else
717       {
718        lleft=dynamic_pointer_cast<DataLazy>(left);
719       }
720       m_readytype=lleft->m_readytype;
721       m_left=lleft;
722       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
723       m_samplesize=getNumDPPSample()*getNoValues();
724       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
725       m_children=m_left->m_children+1;
726       m_height=m_left->m_height+1;
727    #ifdef LAZY_NODE_STORAGE
728       LazyNodeSetup();
729    #endif
730       SIZELIMIT
731    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
732  }  }
733    
734    
735    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
736        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
737        m_op(op),
738        m_axis_offset(axis0),
739        m_transpose(axis1),
740        m_tol(0)
741    {
742       if ((getOpgroup(op)!=G_NP1OUT_2P))
743       {
744        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
745       }
746       DataLazy_ptr lleft;
747       if (!left->isLazy())
748       {
749        lleft=DataLazy_ptr(new DataLazy(left));
750       }
751       else
752       {
753        lleft=dynamic_pointer_cast<DataLazy>(left);
754       }
755       m_readytype=lleft->m_readytype;
756       m_left=lleft;
757       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
758       m_samplesize=getNumDPPSample()*getNoValues();
759       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
760       m_children=m_left->m_children+1;
761       m_height=m_left->m_height+1;
762    #ifdef LAZY_NODE_STORAGE
763       LazyNodeSetup();
764    #endif
765       SIZELIMIT
766    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
767    }
768    
769  DataLazy::~DataLazy()  DataLazy::~DataLazy()
770  {  {
771    #ifdef LAZY_NODE_SETUP
772       delete[] m_sampleids;
773       delete[] m_samples;
774    #endif
775  }  }
776    
777    
# Line 245  DataLazy::getBuffsRequired() const Line 782  DataLazy::getBuffsRequired() const
782  }  }
783    
784    
785  // the vector and the offset are a place where the method could write its data if it wishes  size_t
786  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::getMaxSampleSize() const
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
 const double*  
 DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  
787  {  {
788    if (m_op==IDENTITY)        return m_maxsamplesize;
789    }
790    
791    
792    
793    size_t
794    DataLazy::getSampleBufferSize() const
795    {
796        return m_maxsamplesize*(max(1,m_buffsRequired));
797    }
798    
799    /*
800      \brief Evaluates the expression using methods on Data.
801      This does the work for the collapse method.
802      For reasons of efficiency do not call this method on DataExpanded nodes.
803    */
804    DataReady_ptr
805    DataLazy::collapseToReady()
806    {
807      if (m_readytype=='E')
808      { // this is more an efficiency concern than anything else
809        throw DataException("Programmer Error - do not use collapse on Expanded data.");
810      }
811      if (m_op==IDENTITY)
812    {    {
813      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
814    }    }
815    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
816    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
817    const double* right=0;    Data right;
818    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
819    {    {
820      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
821    }    }
822    double* result=&(v[offset]);    Data result;
823      switch(m_op)
824    {    {
825      switch(m_op)      case ADD:
826      {      result=left+right;
     case ADD:       // since these are pointwise ops, pretend each sample is one point  
     tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  
827      break;      break;
828      case SUB:            case SUB:      
829      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      result=left-right;
830      break;      break;
831      case MUL:            case MUL:      
832      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      result=left*right;
833      break;      break;
834      case DIV:            case DIV:      
835      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      result=left/right;
836      break;      break;
 // unary ops  
837      case SIN:      case SIN:
838      tensor_unary_operation(m_samplesize, left, result, ::sin);      result=left.sin();  
839        break;
840        case COS:
841        result=left.cos();
842        break;
843        case TAN:
844        result=left.tan();
845        break;
846        case ASIN:
847        result=left.asin();
848        break;
849        case ACOS:
850        result=left.acos();
851        break;
852        case ATAN:
853        result=left.atan();
854        break;
855        case SINH:
856        result=left.sinh();
857        break;
858        case COSH:
859        result=left.cosh();
860        break;
861        case TANH:
862        result=left.tanh();
863        break;
864        case ERF:
865        result=left.erf();
866        break;
867       case ASINH:
868        result=left.asinh();
869        break;
870       case ACOSH:
871        result=left.acosh();
872        break;
873       case ATANH:
874        result=left.atanh();
875        break;
876        case LOG10:
877        result=left.log10();
878        break;
879        case LOG:
880        result=left.log();
881        break;
882        case SIGN:
883        result=left.sign();
884        break;
885        case ABS:
886        result=left.abs();
887        break;
888        case NEG:
889        result=left.neg();
890        break;
891        case POS:
892        // it doesn't mean anything for delayed.
893        // it will just trigger a deep copy of the lazy object
894        throw DataException("Programmer error - POS not supported for lazy data.");
895        break;
896        case EXP:
897        result=left.exp();
898        break;
899        case SQRT:
900        result=left.sqrt();
901        break;
902        case RECIP:
903        result=left.oneOver();
904        break;
905        case GZ:
906        result=left.wherePositive();
907        break;
908        case LZ:
909        result=left.whereNegative();
910        break;
911        case GEZ:
912        result=left.whereNonNegative();
913        break;
914        case LEZ:
915        result=left.whereNonPositive();
916        break;
917        case NEZ:
918        result=left.whereNonZero(m_tol);
919        break;
920        case EZ:
921        result=left.whereZero(m_tol);
922        break;
923        case SYM:
924        result=left.symmetric();
925        break;
926        case NSYM:
927        result=left.nonsymmetric();
928        break;
929        case PROD:
930        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
931        break;
932        case TRANS:
933        result=left.transpose(m_axis_offset);
934        break;
935        case TRACE:
936        result=left.trace(m_axis_offset);
937        break;
938        case SWAP:
939        result=left.swapaxes(m_axis_offset, m_transpose);
940        break;
941        case MINVAL:
942        result=left.minval();
943        break;
944        case MAXVAL:
945        result=left.minval();
946        break;
947        default:
948        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
949      }
950      return result.borrowReadyPtr();
951    }
952    
953    /*
954       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
955       This method uses the original methods on the Data class to evaluate the expressions.
956       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
957       the purpose of using DataLazy in the first place).
958    */
959    void
960    DataLazy::collapse()
961    {
962      if (m_op==IDENTITY)
963      {
964        return;
965      }
966      if (m_readytype=='E')
967      { // this is more an efficiency concern than anything else
968        throw DataException("Programmer Error - do not use collapse on Expanded data.");
969      }
970      m_id=collapseToReady();
971      m_op=IDENTITY;
972    }
973    
974    /*
975      \brief Compute the value of the expression (unary operation) for the given sample.
976      \return Vector which stores the value of the subexpression for the given sample.
977      \param v A vector to store intermediate results.
978      \param offset Index in v to begin storing results.
979      \param sampleNo Sample number to evaluate.
980      \param roffset (output parameter) the offset in the return vector where the result begins.
981    
982      The return value will be an existing vector so do not deallocate it.
983      If the result is stored in v it should be stored at the offset given.
984      Everything from offset to the end of v should be considered available for this method to use.
985    */
986    DataTypes::ValueType*
987    DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
988    {
989        // we assume that any collapsing has been done before we get here
990        // since we only have one argument we don't need to think about only
991        // processing single points.
992      if (m_readytype!='E')
993      {
994        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
995      }
996      const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
997      const double* left=&((*vleft)[roffset]);
998      double* result=&(v[offset]);
999      roffset=offset;
1000      switch (m_op)
1001      {
1002        case SIN:  
1003        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1004      break;      break;
1005      case COS:      case COS:
1006      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1007      break;      break;
1008      case TAN:      case TAN:
1009      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1010      break;      break;
1011      case ASIN:      case ASIN:
1012      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1013      break;      break;
1014      case ACOS:      case ACOS:
1015      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1016      break;      break;
1017      case ATAN:      case ATAN:
1018      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1019      break;      break;
1020      case SINH:      case SINH:
1021      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1022      break;      break;
1023      case COSH:      case COSH:
1024      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1025      break;      break;
1026      case TANH:      case TANH:
1027      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1028      break;      break;
1029      case ERF:      case ERF:
1030  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1031      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1032  #else  #else
1033      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1034      break;      break;
1035  #endif  #endif
1036     case ASINH:     case ASINH:
1037  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1038      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1039  #else  #else
1040      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1041  #endif    #endif  
1042      break;      break;
1043     case ACOSH:     case ACOSH:
1044  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1045      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1046  #else  #else
1047      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1048  #endif    #endif  
1049      break;      break;
1050     case ATANH:     case ATANH:
1051  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1052      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1053  #else  #else
1054      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1055  #endif    #endif  
1056      break;      break;
1057      case LOG10:      case LOG10:
1058      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1059      break;      break;
1060      case LOG:      case LOG:
1061      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1062      break;      break;
1063      case SIGN:      case SIGN:
1064      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1065      break;      break;
1066      case ABS:      case ABS:
1067      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1068      break;      break;
1069      case NEG:      case NEG:
1070      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 357  DataLazy::resolveSample(ValueType& v,int Line 1075  DataLazy::resolveSample(ValueType& v,int
1075      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1076      break;      break;
1077      case EXP:      case EXP:
1078      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1079      break;      break;
1080      case SQRT:      case SQRT:
1081      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1082      break;      break;
1083      case RECIP:      case RECIP:
1084      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 377  DataLazy::resolveSample(ValueType& v,int Line 1095  DataLazy::resolveSample(ValueType& v,int
1095      case LEZ:      case LEZ:
1096      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1097      break;      break;
1098    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1099        case NEZ:
1100        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1101        break;
1102        case EZ:
1103        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1104        break;
1105    
1106        default:
1107        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1108      }
1109      return &v;
1110    }
1111    
1112    
1113    /*
1114      \brief Compute the value of the expression (reduction operation) for the given sample.
1115      \return Vector which stores the value of the subexpression for the given sample.
1116      \param v A vector to store intermediate results.
1117      \param offset Index in v to begin storing results.
1118      \param sampleNo Sample number to evaluate.
1119      \param roffset (output parameter) the offset in the return vector where the result begins.
1120    
1121      The return value will be an existing vector so do not deallocate it.
1122      If the result is stored in v it should be stored at the offset given.
1123      Everything from offset to the end of v should be considered available for this method to use.
1124    */
1125    DataTypes::ValueType*
1126    DataLazy::resolveReduction(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1127    {
1128        // we assume that any collapsing has been done before we get here
1129        // since we only have one argument we don't need to think about only
1130        // processing single points.
1131      if (m_readytype!='E')
1132      {
1133        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1134      }
1135      const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
1136      double* result=&(v[offset]);
1137      roffset=offset;
1138      switch (m_op)
1139      {
1140        case MINVAL:
1141        {
1142          FMin op;
1143          *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max());
1144        }
1145        break;
1146        case MAXVAL:
1147        {
1148          FMax op;
1149          *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max()*-1);
1150        }
1151        break;
1152        default:
1153        throw DataException("Programmer error - resolveReduction can not resolve operator "+opToString(m_op)+".");
1154      }
1155      return &v;
1156    }
1157    
1158    
1159    
1160    /*
1161      \brief Compute the value of the expression (unary operation) for the given sample.
1162      \return Vector which stores the value of the subexpression for the given sample.
1163      \param v A vector to store intermediate results.
1164      \param offset Index in v to begin storing results.
1165      \param sampleNo Sample number to evaluate.
1166      \param roffset (output parameter) the offset in the return vector where the result begins.
1167    
1168      The return value will be an existing vector so do not deallocate it.
1169      If the result is stored in v it should be stored at the offset given.
1170      Everything from offset to the end of v should be considered available for this method to use.
1171    */
1172    DataTypes::ValueType*
1173    DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1174    {
1175        // we assume that any collapsing has been done before we get here
1176        // since we only have one argument we don't need to think about only
1177        // processing single points.
1178      if (m_readytype!='E')
1179      {
1180        throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
1181      }
1182        // since we can't write the result over the input, we need a result offset further along
1183      size_t subroffset=roffset+m_samplesize;
1184    LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1185      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1186      roffset=offset;
1187      size_t loop=0;
1188      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1189      size_t step=getNoValues();
1190      switch (m_op)
1191      {
1192        case SYM:
1193        for (loop=0;loop<numsteps;++loop)
1194        {
1195            DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1196            subroffset+=step;
1197            offset+=step;
1198        }
1199        break;
1200        case NSYM:
1201        for (loop=0;loop<numsteps;++loop)
1202        {
1203            DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1204            subroffset+=step;
1205            offset+=step;
1206        }
1207        break;
1208        default:
1209        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1210      }
1211      return &v;
1212    }
1213    
1214    /*
1215      \brief Compute the value of the expression (unary operation) for the given sample.
1216      \return Vector which stores the value of the subexpression for the given sample.
1217      \param v A vector to store intermediate results.
1218      \param offset Index in v to begin storing results.
1219      \param sampleNo Sample number to evaluate.
1220      \param roffset (output parameter) the offset in the return vector where the result begins.
1221    
1222      The return value will be an existing vector so do not deallocate it.
1223      If the result is stored in v it should be stored at the offset given.
1224      Everything from offset to the end of v should be considered available for this method to use.
1225    */
1226    DataTypes::ValueType*
1227    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1228    {
1229        // we assume that any collapsing has been done before we get here
1230        // since we only have one argument we don't need to think about only
1231        // processing single points.
1232      if (m_readytype!='E')
1233      {
1234        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1235      }
1236        // since we can't write the result over the input, we need a result offset further along
1237      size_t subroffset;
1238      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1239    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1240    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1241      roffset=offset;
1242      size_t loop=0;
1243      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1244      size_t outstep=getNoValues();
1245      size_t instep=m_left->getNoValues();
1246    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1247      switch (m_op)
1248      {
1249        case TRACE:
1250        for (loop=0;loop<numsteps;++loop)
1251        {
1252    size_t zz=sampleNo*getNumDPPSample()+loop;
1253    if (zz==5800)
1254    {
1255    LAZYDEBUG(cerr << "point=" <<  zz<< endl;)
1256    LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1257    LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1258    LAZYDEBUG(cerr << subroffset << endl;)
1259    LAZYDEBUG(cerr << "output=" << offset << endl;)
1260    }
1261                DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1262    if (zz==5800)
1263    {
1264    LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1265    }
1266            subroffset+=instep;
1267            offset+=outstep;
1268        }
1269        break;
1270        case TRANS:
1271        for (loop=0;loop<numsteps;++loop)
1272        {
1273                DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1274            subroffset+=instep;
1275            offset+=outstep;
1276        }
1277        break;
1278        default:
1279        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1280      }
1281      return &v;
1282    }
1283    
1284    
1285    /*
1286      \brief Compute the value of the expression (unary operation with int params) for the given sample.
1287      \return Vector which stores the value of the subexpression for the given sample.
1288      \param v A vector to store intermediate results.
1289      \param offset Index in v to begin storing results.
1290      \param sampleNo Sample number to evaluate.
1291      \param roffset (output parameter) the offset in the return vector where the result begins.
1292    
1293      The return value will be an existing vector so do not deallocate it.
1294      If the result is stored in v it should be stored at the offset given.
1295      Everything from offset to the end of v should be considered available for this method to use.
1296    */
1297    DataTypes::ValueType*
1298    DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1299    {
1300        // we assume that any collapsing has been done before we get here
1301        // since we only have one argument we don't need to think about only
1302        // processing single points.
1303      if (m_readytype!='E')
1304      {
1305        throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1306      }
1307        // since we can't write the result over the input, we need a result offset further along
1308      size_t subroffset;
1309      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1310    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1311    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1312      roffset=offset;
1313      size_t loop=0;
1314      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1315      size_t outstep=getNoValues();
1316      size_t instep=m_left->getNoValues();
1317    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1318      switch (m_op)
1319      {
1320        case SWAP:
1321        for (loop=0;loop<numsteps;++loop)
1322        {
1323                DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1324            subroffset+=instep;
1325            offset+=outstep;
1326        }
1327        break;
1328      default:      default:
1329      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1330      }
1331      return &v;
1332    }
1333    
1334    
1335    
1336    #define PROC_OP(TYPE,X)                               \
1337        for (int j=0;j<onumsteps;++j)\
1338        {\
1339          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1340          { \
1341    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1342    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1343             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1344    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1345             lroffset+=leftstep; \
1346             rroffset+=rightstep; \
1347          }\
1348          lroffset+=oleftstep;\
1349          rroffset+=orightstep;\
1350        }
1351    
1352    /*
1353      \brief Compute the value of the expression (binary operation) for the given sample.
1354      \return Vector which stores the value of the subexpression for the given sample.
1355      \param v A vector to store intermediate results.
1356      \param offset Index in v to begin storing results.
1357      \param sampleNo Sample number to evaluate.
1358      \param roffset (output parameter) the offset in the return vector where the result begins.
1359    
1360      The return value will be an existing vector so do not deallocate it.
1361      If the result is stored in v it should be stored at the offset given.
1362      Everything from offset to the end of v should be considered available for this method to use.
1363    */
1364    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1365    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1366    // If both children are expanded, then we can process them in a single operation (we treat
1367    // the whole sample as one big datapoint.
1368    // If one of the children is not expanded, then we need to treat each point in the sample
1369    // individually.
1370    // There is an additional complication when scalar operations are considered.
1371    // For example, 2+Vector.
1372    // In this case each double within the point is treated individually
1373    DataTypes::ValueType*
1374    DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1375    {
1376    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1377    
1378      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1379        // first work out which of the children are expanded
1380      bool leftExp=(m_left->m_readytype=='E');
1381      bool rightExp=(m_right->m_readytype=='E');
1382      if (!leftExp && !rightExp)
1383      {
1384        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1385      }
1386      bool leftScalar=(m_left->getRank()==0);
1387      bool rightScalar=(m_right->getRank()==0);
1388      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1389      {
1390        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1391      }
1392      size_t leftsize=m_left->getNoValues();
1393      size_t rightsize=m_right->getNoValues();
1394      size_t chunksize=1;           // how many doubles will be processed in one go
1395      int leftstep=0;       // how far should the left offset advance after each step
1396      int rightstep=0;
1397      int numsteps=0;       // total number of steps for the inner loop
1398      int oleftstep=0;  // the o variables refer to the outer loop
1399      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1400      int onumsteps=1;
1401      
1402      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1403      bool RES=(rightExp && rightScalar);
1404      bool LS=(!leftExp && leftScalar); // left is a single scalar
1405      bool RS=(!rightExp && rightScalar);
1406      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1407      bool RN=(!rightExp && !rightScalar);
1408      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1409      bool REN=(rightExp && !rightScalar);
1410    
1411      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1412      {
1413        chunksize=m_left->getNumDPPSample()*leftsize;
1414        leftstep=0;
1415        rightstep=0;
1416        numsteps=1;
1417      }
1418      else if (LES || RES)
1419      {
1420        chunksize=1;
1421        if (LES)        // left is an expanded scalar
1422        {
1423            if (RS)
1424            {
1425               leftstep=1;
1426               rightstep=0;
1427               numsteps=m_left->getNumDPPSample();
1428            }
1429            else        // RN or REN
1430            {
1431               leftstep=0;
1432               oleftstep=1;
1433               rightstep=1;
1434               orightstep=(RN ? -(int)rightsize : 0);
1435               numsteps=rightsize;
1436               onumsteps=m_left->getNumDPPSample();
1437            }
1438        }
1439        else        // right is an expanded scalar
1440        {
1441            if (LS)
1442            {
1443               rightstep=1;
1444               leftstep=0;
1445               numsteps=m_right->getNumDPPSample();
1446            }
1447            else
1448            {
1449               rightstep=0;
1450               orightstep=1;
1451               leftstep=1;
1452               oleftstep=(LN ? -(int)leftsize : 0);
1453               numsteps=leftsize;
1454               onumsteps=m_right->getNumDPPSample();
1455            }
1456        }
1457      }
1458      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1459      {
1460        if (LEN)    // and Right will be a single value
1461        {
1462            chunksize=rightsize;
1463            leftstep=rightsize;
1464            rightstep=0;
1465            numsteps=m_left->getNumDPPSample();
1466            if (RS)
1467            {
1468               numsteps*=leftsize;
1469            }
1470        }
1471        else    // REN
1472        {
1473            chunksize=leftsize;
1474            rightstep=leftsize;
1475            leftstep=0;
1476            numsteps=m_right->getNumDPPSample();
1477            if (LS)
1478            {
1479               numsteps*=rightsize;
1480            }
1481        }
1482      }
1483    
1484      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1485        // Get the values of sub-expressions
1486      const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1487        // calcBufss for why we can't put offset as the 2nd param above
1488      const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1489        // the right child starts further along.
1490    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1491    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1492    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1493    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1494    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1495    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1496    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1497    
1498    
1499      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1500      switch(m_op)
1501      {
1502        case ADD:
1503            PROC_OP(NO_ARG,plus<double>());
1504        break;
1505        case SUB:
1506        PROC_OP(NO_ARG,minus<double>());
1507        break;
1508        case MUL:
1509        PROC_OP(NO_ARG,multiplies<double>());
1510        break;
1511        case DIV:
1512        PROC_OP(NO_ARG,divides<double>());
1513        break;
1514        case POW:
1515           PROC_OP(double (double,double),::pow);
1516        break;
1517        default:
1518        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1519      }
1520      roffset=offset;
1521      return &v;
1522    }
1523    
1524    
1525    
1526    /*
1527      \brief Compute the value of the expression (tensor product) for the given sample.
1528      \return Vector which stores the value of the subexpression for the given sample.
1529      \param v A vector to store intermediate results.
1530      \param offset Index in v to begin storing results.
1531      \param sampleNo Sample number to evaluate.
1532      \param roffset (output parameter) the offset in the return vector where the result begins.
1533    
1534      The return value will be an existing vector so do not deallocate it.
1535      If the result is stored in v it should be stored at the offset given.
1536      Everything from offset to the end of v should be considered available for this method to use.
1537    */
1538    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1539    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1540    // unlike the other resolve helpers, we must treat these datapoints separately.
1541    DataTypes::ValueType*
1542    DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1543    {
1544    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)
1545    
1546      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1547        // first work out which of the children are expanded
1548      bool leftExp=(m_left->m_readytype=='E');
1549      bool rightExp=(m_right->m_readytype=='E');
1550      int steps=getNumDPPSample();
1551    /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1552      int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1553      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1554      int rightStep=(rightExp?m_right->getNoValues() : 0);
1555    
1556      int resultStep=getNoValues();
1557        // Get the values of sub-expressions (leave a gap of one sample for the result).
1558      int gap=offset+m_samplesize;  
1559    
1560    LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1561    
1562      const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);
1563      gap+=m_left->getMaxSampleSize();
1564    
1565    
1566    LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1567    
1568    
1569      const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);
1570    
1571    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1572    cout << getNoValues() << endl;)
1573    LAZYDEBUG(cerr << "Result of left=";)
1574    LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1575    
1576    for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1577    {
1578    cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1579    if (i%4==0) cout << endl;
1580    })
1581    LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1582    LAZYDEBUG(
1583    for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1584    {
1585    cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1586    if (i%4==0) cout << endl;
1587    }
1588    cerr << endl;
1589    )
1590    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1591    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1592    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1593    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1594    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1595    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1596    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1597    
1598      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1599      switch(m_op)
1600      {
1601        case PROD:
1602        for (int i=0;i<steps;++i,resultp+=resultStep)
1603        {
1604    
1605    LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1606    LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1607    LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1608    
1609              const double *ptr_0 = &((*left)[lroffset]);
1610              const double *ptr_1 = &((*right)[rroffset]);
1611    
1612    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1613    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1614    
1615              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1616    
1617    LAZYDEBUG(cout << "Results--\n";
1618    {
1619      DataVector dv(getNoValues());
1620    for (int z=0;z<getNoValues();++z)
1621    {
1622      cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1623      if (z%4==0) cout << endl;
1624      dv[z]=resultp[z];
1625    }
1626    cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1627    cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1628    }
1629    )
1630          lroffset+=leftStep;
1631          rroffset+=rightStep;
1632        }
1633        break;
1634        default:
1635        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1636      }
1637      roffset=offset;
1638      return &v;
1639    }
1640    
1641    
1642    #ifdef LAZY_NODE_STORAGE
1643    
1644    // The result will be stored in m_samples
1645    // The return value is a pointer to the DataVector, offset is the offset within the return value
1646    const DataTypes::ValueType*
1647    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1648    {
1649    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1650        // collapse so we have a 'E' node or an IDENTITY for some other type
1651      if (m_readytype!='E' && m_op!=IDENTITY)
1652      {
1653        collapse();
1654      }
1655      if (m_op==IDENTITY)  
1656      {
1657        const ValueType& vec=m_id->getVectorRO();
1658    //     if (m_readytype=='C')
1659    //     {
1660    //  roffset=0;      // all samples read from the same position
1661    //  return &(m_samples);
1662    //     }
1663        roffset=m_id->getPointOffset(sampleNo, 0);
1664        return &(vec);
1665      }
1666      if (m_readytype!='E')
1667      {
1668        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1669      }
1670      if (m_sampleids[tid]==sampleNo)
1671      {
1672        roffset=tid*m_samplesize;
1673        return &(m_samples);        // sample is already resolved
1674      }
1675      m_sampleids[tid]=sampleNo;
1676      switch (getOpgroup(m_op))
1677      {
1678      case G_UNARY:
1679      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1680      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1681      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1682      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1683      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1684      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1685      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1686      default:
1687        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1688      }
1689    }
1690    
1691    const DataTypes::ValueType*
1692    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1693    {
1694        // we assume that any collapsing has been done before we get here
1695        // since we only have one argument we don't need to think about only
1696        // processing single points.
1697        // we will also know we won't get identity nodes
1698      if (m_readytype!='E')
1699      {
1700        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1701      }
1702      if (m_op==IDENTITY)
1703      {
1704        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1705      }
1706      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1707      const double* left=&((*leftres)[roffset]);
1708      roffset=m_samplesize*tid;
1709      double* result=&(m_samples[roffset]);
1710      switch (m_op)
1711      {
1712        case SIN:  
1713        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1714        break;
1715        case COS:
1716        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1717        break;
1718        case TAN:
1719        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1720        break;
1721        case ASIN:
1722        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1723        break;
1724        case ACOS:
1725        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1726        break;
1727        case ATAN:
1728        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1729        break;
1730        case SINH:
1731        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1732        break;
1733        case COSH:
1734        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1735        break;
1736        case TANH:
1737        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1738        break;
1739        case ERF:
1740    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1741        throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1742    #else
1743        tensor_unary_operation(m_samplesize, left, result, ::erf);
1744        break;
1745    #endif
1746       case ASINH:
1747    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1748        tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1749    #else
1750        tensor_unary_operation(m_samplesize, left, result, ::asinh);
1751    #endif  
1752        break;
1753       case ACOSH:
1754    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1755        tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1756    #else
1757        tensor_unary_operation(m_samplesize, left, result, ::acosh);
1758    #endif  
1759        break;
1760       case ATANH:
1761    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1762        tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1763    #else
1764        tensor_unary_operation(m_samplesize, left, result, ::atanh);
1765    #endif  
1766        break;
1767        case LOG10:
1768        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1769        break;
1770        case LOG:
1771        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1772        break;
1773        case SIGN:
1774        tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1775        break;
1776        case ABS:
1777        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1778        break;
1779        case NEG:
1780        tensor_unary_operation(m_samplesize, left, result, negate<double>());
1781        break;
1782        case POS:
1783        // it doesn't mean anything for delayed.
1784        // it will just trigger a deep copy of the lazy object
1785        throw DataException("Programmer error - POS not supported for lazy data.");
1786        break;
1787        case EXP:
1788        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1789        break;
1790        case SQRT:
1791        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1792        break;
1793        case RECIP:
1794        tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1795        break;
1796        case GZ:
1797        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1798        break;
1799        case LZ:
1800        tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1801        break;
1802        case GEZ:
1803        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1804        break;
1805        case LEZ:
1806        tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1807        break;
1808    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1809        case NEZ:
1810        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1811        break;
1812        case EZ:
1813        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1814        break;
1815    
1816        default:
1817        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1818      }
1819      return &(m_samples);
1820    }
1821    
1822    
1823    const DataTypes::ValueType*
1824    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1825    {
1826        // we assume that any collapsing has been done before we get here
1827        // since we only have one argument we don't need to think about only
1828        // processing single points.
1829        // we will also know we won't get identity nodes
1830      if (m_readytype!='E')
1831      {
1832        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1833      }
1834      if (m_op==IDENTITY)
1835      {
1836        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1837      }
1838      size_t loffset=0;
1839      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1840    
1841      roffset=m_samplesize*tid;
1842      double* result=&(m_samples[roffset]);
1843      switch (m_op)
1844      {
1845        case MINVAL:
1846        {
1847          FMin op;
1848          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1849        }
1850        break;
1851        case MAXVAL:
1852        {
1853          FMax op;
1854          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1855        }
1856        break;
1857        default:
1858        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1859      }
1860      return &(m_samples);
1861    }
1862    
1863    const DataTypes::ValueType*
1864    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1865    {
1866        // we assume that any collapsing has been done before we get here
1867        // since we only have one argument we don't need to think about only
1868        // processing single points.
1869      if (m_readytype!='E')
1870      {
1871        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1872      }
1873      if (m_op==IDENTITY)
1874      {
1875        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1876      }
1877      size_t subroffset;
1878      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1879      roffset=m_samplesize*tid;
1880      size_t loop=0;
1881      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1882      size_t step=getNoValues();
1883      size_t offset=roffset;
1884      switch (m_op)
1885      {
1886        case SYM:
1887        for (loop=0;loop<numsteps;++loop)
1888        {
1889            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1890            subroffset+=step;
1891            offset+=step;
1892        }
1893        break;
1894        case NSYM:
1895        for (loop=0;loop<numsteps;++loop)
1896        {
1897            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1898            subroffset+=step;
1899            offset+=step;
1900        }
1901        break;
1902        default:
1903        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1904      }
1905      return &m_samples;
1906    }
1907    
1908    const DataTypes::ValueType*
1909    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1910    {
1911        // we assume that any collapsing has been done before we get here
1912        // since we only have one argument we don't need to think about only
1913        // processing single points.
1914      if (m_readytype!='E')
1915      {
1916        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1917      }
1918      if (m_op==IDENTITY)
1919      {
1920        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1921      }
1922      size_t subroffset;
1923      size_t offset;
1924      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1925      roffset=m_samplesize*tid;
1926      offset=roffset;
1927      size_t loop=0;
1928      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1929      size_t outstep=getNoValues();
1930      size_t instep=m_left->getNoValues();
1931      switch (m_op)
1932      {
1933        case TRACE:
1934        for (loop=0;loop<numsteps;++loop)
1935        {
1936                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1937            subroffset+=instep;
1938            offset+=outstep;
1939        }
1940        break;
1941        case TRANS:
1942        for (loop=0;loop<numsteps;++loop)
1943        {
1944                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1945            subroffset+=instep;
1946            offset+=outstep;
1947        }
1948        break;
1949        default:
1950        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1951      }
1952      return &m_samples;
1953    }
1954    
1955    
1956    const DataTypes::ValueType*
1957    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1958    {
1959      if (m_readytype!='E')
1960      {
1961        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1962      }
1963      if (m_op==IDENTITY)
1964      {
1965        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1966      }
1967      size_t subroffset;
1968      size_t offset;
1969      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1970      roffset=m_samplesize*tid;
1971      offset=roffset;
1972      size_t loop=0;
1973      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1974      size_t outstep=getNoValues();
1975      size_t instep=m_left->getNoValues();
1976      switch (m_op)
1977      {
1978        case SWAP:
1979        for (loop=0;loop<numsteps;++loop)
1980        {
1981                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1982            subroffset+=instep;
1983            offset+=outstep;
1984        }
1985        break;
1986        default:
1987        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1988      }
1989      return &m_samples;
1990    }
1991    
1992    
1993    
1994    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1995    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1996    // If both children are expanded, then we can process them in a single operation (we treat
1997    // the whole sample as one big datapoint.
1998    // If one of the children is not expanded, then we need to treat each point in the sample
1999    // individually.
2000    // There is an additional complication when scalar operations are considered.
2001    // For example, 2+Vector.
2002    // In this case each double within the point is treated individually
2003    const DataTypes::ValueType*
2004    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
2005    {
2006    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
2007    
2008      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2009        // first work out which of the children are expanded
2010      bool leftExp=(m_left->m_readytype=='E');
2011      bool rightExp=(m_right->m_readytype=='E');
2012      if (!leftExp && !rightExp)
2013      {
2014        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
2015      }
2016      bool leftScalar=(m_left->getRank()==0);
2017      bool rightScalar=(m_right->getRank()==0);
2018      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
2019      {
2020        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
2021      }
2022      size_t leftsize=m_left->getNoValues();
2023      size_t rightsize=m_right->getNoValues();
2024      size_t chunksize=1;           // how many doubles will be processed in one go
2025      int leftstep=0;       // how far should the left offset advance after each step
2026      int rightstep=0;
2027      int numsteps=0;       // total number of steps for the inner loop
2028      int oleftstep=0;  // the o variables refer to the outer loop
2029      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
2030      int onumsteps=1;
2031      
2032      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
2033      bool RES=(rightExp && rightScalar);
2034      bool LS=(!leftExp && leftScalar); // left is a single scalar
2035      bool RS=(!rightExp && rightScalar);
2036      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
2037      bool RN=(!rightExp && !rightScalar);
2038      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
2039      bool REN=(rightExp && !rightScalar);
2040    
2041      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
2042      {
2043        chunksize=m_left->getNumDPPSample()*leftsize;
2044        leftstep=0;
2045        rightstep=0;
2046        numsteps=1;
2047      }
2048      else if (LES || RES)
2049      {
2050        chunksize=1;
2051        if (LES)        // left is an expanded scalar
2052        {
2053            if (RS)
2054            {
2055               leftstep=1;
2056               rightstep=0;
2057               numsteps=m_left->getNumDPPSample();
2058            }
2059            else        // RN or REN
2060            {
2061               leftstep=0;
2062               oleftstep=1;
2063               rightstep=1;
2064               orightstep=(RN ? -(int)rightsize : 0);
2065               numsteps=rightsize;
2066               onumsteps=m_left->getNumDPPSample();
2067            }
2068        }
2069        else        // right is an expanded scalar
2070        {
2071            if (LS)
2072            {
2073               rightstep=1;
2074               leftstep=0;
2075               numsteps=m_right->getNumDPPSample();
2076            }
2077            else
2078            {
2079               rightstep=0;
2080               orightstep=1;
2081               leftstep=1;
2082               oleftstep=(LN ? -(int)leftsize : 0);
2083               numsteps=leftsize;
2084               onumsteps=m_right->getNumDPPSample();
2085            }
2086        }
2087      }
2088      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
2089      {
2090        if (LEN)    // and Right will be a single value
2091        {
2092            chunksize=rightsize;
2093            leftstep=rightsize;
2094            rightstep=0;
2095            numsteps=m_left->getNumDPPSample();
2096            if (RS)
2097            {
2098               numsteps*=leftsize;
2099            }
2100        }
2101        else    // REN
2102        {
2103            chunksize=leftsize;
2104            rightstep=leftsize;
2105            leftstep=0;
2106            numsteps=m_right->getNumDPPSample();
2107            if (LS)
2108            {
2109               numsteps*=rightsize;
2110            }
2111        }
2112      }
2113    
2114      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
2115        // Get the values of sub-expressions
2116      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
2117      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2118    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2119    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2120    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2121    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2122    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2123    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2124    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
2125    
2126    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2127    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2128    
2129    
2130      roffset=m_samplesize*tid;
2131      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
2132      switch(m_op)
2133      {
2134        case ADD:
2135            PROC_OP(NO_ARG,plus<double>());
2136        break;
2137        case SUB:
2138        PROC_OP(NO_ARG,minus<double>());
2139        break;
2140        case MUL:
2141        PROC_OP(NO_ARG,multiplies<double>());
2142        break;
2143        case DIV:
2144        PROC_OP(NO_ARG,divides<double>());
2145        break;
2146        case POW:
2147           PROC_OP(double (double,double),::pow);
2148        break;
2149        default:
2150        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2151      }
2152    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2153      return &m_samples;
2154    }
2155    
2156    
2157    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2158    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2159    // unlike the other resolve helpers, we must treat these datapoints separately.
2160    const DataTypes::ValueType*
2161    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2162    {
2163    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2164    
2165      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2166        // first work out which of the children are expanded
2167      bool leftExp=(m_left->m_readytype=='E');
2168      bool rightExp=(m_right->m_readytype=='E');
2169      int steps=getNumDPPSample();
2170      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
2171      int rightStep=(rightExp?m_right->getNoValues() : 0);
2172    
2173      int resultStep=getNoValues();
2174      roffset=m_samplesize*tid;
2175      size_t offset=roffset;
2176    
2177      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2178    
2179      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2180    
2181    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
2182    cout << getNoValues() << endl;)
2183    
2184    
2185    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2186    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2187    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2188    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2189    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2190    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2191    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2192    
2193      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
2194      switch(m_op)
2195      {
2196        case PROD:
2197        for (int i=0;i<steps;++i,resultp+=resultStep)
2198        {
2199              const double *ptr_0 = &((*left)[lroffset]);
2200              const double *ptr_1 = &((*right)[rroffset]);
2201    
2202    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2203    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2204    
2205              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2206    
2207          lroffset+=leftStep;
2208          rroffset+=rightStep;
2209        }
2210        break;
2211        default:
2212        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2213      }
2214      roffset=offset;
2215      return &m_samples;
2216    }
2217    #endif //LAZY_NODE_STORAGE
2218    
2219    /*
2220      \brief Compute the value of the expression for the given sample.
2221      \return Vector which stores the value of the subexpression for the given sample.
2222      \param v A vector to store intermediate results.
2223      \param offset Index in v to begin storing results.
2224      \param sampleNo Sample number to evaluate.
2225      \param roffset (output parameter) the offset in the return vector where the result begins.
2226    
2227      The return value will be an existing vector so do not deallocate it.
2228    */
2229    // the vector and the offset are a place where the method could write its data if it wishes
2230    // it is not obligated to do so. For example, if it has its own storage already, it can use that.
2231    // Hence the return value to indicate where the data is actually stored.
2232    // Regardless, the storage should be assumed to be used, even if it isn't.
2233    
2234    // the roffset is the offset within the returned vector where the data begins
2235    const DataTypes::ValueType*
2236    DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
2237    {
2238    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
2239        // collapse so we have a 'E' node or an IDENTITY for some other type
2240      if (m_readytype!='E' && m_op!=IDENTITY)
2241      {
2242        collapse();
2243      }
2244      if (m_op==IDENTITY)  
2245      {
2246        const ValueType& vec=m_id->getVectorRO();
2247        if (m_readytype=='C')
2248        {
2249        roffset=0;
2250    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
2251        return &(vec);
2252      }      }
2253        roffset=m_id->getPointOffset(sampleNo, 0);
2254    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
2255        return &(vec);
2256      }
2257      if (m_readytype!='E')
2258      {
2259        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
2260    }    }
2261    return result;    switch (getOpgroup(m_op))
2262      {
2263      case G_UNARY:
2264      case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
2265      case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
2266      case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
2267      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
2268      case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
2269      case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
2270      case G_REDUCTION: return resolveReduction(v, offset, sampleNo, roffset);
2271      default:
2272        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
2273      }
2274    
2275  }  }
2276    
2277    const DataTypes::ValueType*
2278    DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
2279    {
2280    #ifdef _OPENMP
2281        int tid=omp_get_thread_num();
2282    #else
2283        int tid=0;
2284    #endif
2285    #ifdef LAZY_NODE_STORAGE
2286        return resolveNodeSample(tid, sampleNo, roffset);
2287    #else
2288        return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
2289    #endif
2290    }
2291    
2292    
2293    // This needs to do the work of the identity constructor
2294    void
2295    DataLazy::resolveToIdentity()
2296    {
2297       if (m_op==IDENTITY)
2298        return;
2299    #ifndef LAZY_NODE_STORAGE
2300       DataReady_ptr p=resolveVectorWorker();
2301    #else
2302       DataReady_ptr p=resolveNodeWorker();
2303    #endif
2304       makeIdentity(p);
2305    }
2306    
2307    void DataLazy::makeIdentity(const DataReady_ptr& p)
2308    {
2309       m_op=IDENTITY;
2310       m_axis_offset=0;
2311       m_transpose=0;
2312       m_SL=m_SM=m_SR=0;
2313       m_children=m_height=0;
2314       m_id=p;
2315       if(p->isConstant()) {m_readytype='C';}
2316       else if(p->isExpanded()) {m_readytype='E';}
2317       else if (p->isTagged()) {m_readytype='T';}
2318       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
2319       m_buffsRequired=1;
2320       m_samplesize=p->getNumDPPSample()*p->getNoValues();
2321       m_maxsamplesize=m_samplesize;
2322       m_left.reset();
2323       m_right.reset();
2324    }
2325    
2326    
2327  DataReady_ptr  DataReady_ptr
2328  DataLazy::resolve()  DataLazy::resolve()
2329  {  {
2330    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
2331    // so the allocation of v needs to move inside the loop somehow      return m_id;
2332    }
2333    
2334    #ifdef LAZY_NODE_STORAGE
2335    
2336  cout << "Sample size=" << m_samplesize << endl;  // This version of resolve uses storage in each node to hold results
2337  cout << "Buffers=" << m_buffsRequired << endl;  DataReady_ptr
2338    DataLazy::resolveNodeWorker()
2339    {
2340      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2341      {
2342        collapse();
2343      }
2344      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
2345      {
2346        return m_id;
2347      }
2348        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2349      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2350      ValueType& resvec=result->getVectorRW();
2351      DataReady_ptr resptr=DataReady_ptr(result);
2352    
2353    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);    int sample;
2354      int totalsamples=getNumSamples();
2355      const ValueType* res=0;   // Storage for answer
2356    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2357      #pragma omp parallel for private(sample,res) schedule(static)
2358      for (sample=0;sample<totalsamples;++sample)
2359      {
2360        size_t roffset=0;
2361    #ifdef _OPENMP
2362        res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2363    #else
2364        res=resolveNodeSample(0,sample,roffset);
2365    #endif
2366    LAZYDEBUG(cout << "Sample #" << sample << endl;)
2367    LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2368        DataVector::size_type outoffset=result->getPointOffset(sample,0);
2369        memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
2370      }
2371      return resptr;
2372    }
2373    
2374    #endif // LAZY_NODE_STORAGE
2375    
2376    // To simplify the memory management, all threads operate on one large vector, rather than one each.
2377    // Each sample is evaluated independently and copied into the result DataExpanded.
2378    DataReady_ptr
2379    DataLazy::resolveVectorWorker()
2380    {
2381    
2382    LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
2383    LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
2384      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2385      {
2386        collapse();
2387      }
2388      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
2389      {
2390        return m_id;
2391      }
2392        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2393      size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
2394        // storage to evaluate its expression
2395    int numthreads=1;    int numthreads=1;
2396  #ifdef _OPENMP  #ifdef _OPENMP
2397    numthreads=omp_get_max_threads();    numthreads=omp_get_max_threads();
   int threadnum=0;  
2398  #endif  #endif
2399    ValueType v(numthreads*threadbuffersize);    ValueType v(numthreads*threadbuffersize);
2400  cout << "Buffer created with size=" << v.size() << endl;  LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
2401    ValueType dummy(getNoValues());    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2402    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);    ValueType& resvec=result->getVectorRW();
   ValueType& resvec=result->getVector();  
2403    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
2404    int sample;    int sample;
2405    int resoffset;    size_t outoffset;     // offset in the output data
2406    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
2407    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;   // Vector storing the answer
2408      size_t resoffset=0;       // where in the vector to find the answer
2409    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2410      #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
2411    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
2412    {    {
2413    LAZYDEBUG(cout << "################################# " << sample << endl;)
2414  #ifdef _OPENMP  #ifdef _OPENMP
2415      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());      res=resolveVectorSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
2416  #else  #else
2417      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.      res=resolveVectorSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
2418  #endif  #endif
2419      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
2420      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
2421        outoffset=result->getPointOffset(sample,0);
2422    LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
2423        for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
2424      {      {
2425      resvec[resoffset]=res[i];  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
2426        resvec[outoffset]=(*res)[resoffset];
2427      }      }
2428    LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
2429    LAZYDEBUG(cerr << "*********************************" << endl;)
2430    }    }
2431    return resptr;    return resptr;
2432  }  }
# Line 435  DataLazy::toString() const Line 2440  DataLazy::toString() const
2440    return oss.str();    return oss.str();
2441  }  }
2442    
2443    
2444  void  void
2445  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
2446  {  {
2447    //    oss << "[" << m_children <<";"<<m_height <<"]";
2448    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2449    {    {
2450    case G_IDENTITY:    case G_IDENTITY:
2451        if (m_id->isExpanded())
2452        {
2453           oss << "E";
2454        }
2455        else if (m_id->isTagged())
2456        {
2457          oss << "T";
2458        }
2459        else if (m_id->isConstant())
2460        {
2461          oss << "C";
2462        }
2463        else
2464        {
2465          oss << "?";
2466        }
2467      oss << '@' << m_id.get();      oss << '@' << m_id.get();
2468      break;      break;
2469    case G_BINARY:    case G_BINARY:
# Line 451  DataLazy::intoString(ostringstream& oss) Line 2474  DataLazy::intoString(ostringstream& oss)
2474      oss << ')';      oss << ')';
2475      break;      break;
2476    case G_UNARY:    case G_UNARY:
2477      case G_UNARY_P:
2478      case G_NP1OUT:
2479      case G_NP1OUT_P:
2480      case G_REDUCTION:
2481      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
2482      m_left->intoString(oss);      m_left->intoString(oss);
2483      oss << ')';      oss << ')';
2484      break;      break;
2485      case G_TENSORPROD:
2486        oss << opToString(m_op) << '(';
2487        m_left->intoString(oss);
2488        oss << ", ";
2489        m_right->intoString(oss);
2490        oss << ')';
2491        break;
2492      case G_NP1OUT_2P:
2493        oss << opToString(m_op) << '(';
2494        m_left->intoString(oss);
2495        oss << ", " << m_axis_offset << ", " << m_transpose;
2496        oss << ')';
2497        break;
2498    default:    default:
2499      oss << "UNKNOWN";      oss << "UNKNOWN";
2500    }    }
2501  }  }
2502    
 // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
2503  DataAbstract*  DataAbstract*
2504  DataLazy::deepCopy()  DataLazy::deepCopy()
2505  {  {
2506    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
2507    {    {
2508      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2509      case G_UNARY:
2510      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2511      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2512      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2513      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2514      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2515      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2516      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2517      default:
2518        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2519    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2520  }  }
2521    
2522    
2523    
2524    // There is no single, natural interpretation of getLength on DataLazy.
2525    // Instances of DataReady can look at the size of their vectors.
2526    // For lazy though, it could be the size the data would be if it were resolved;
2527    // or it could be some function of the lengths of the DataReady instances which
2528    // form part of the expression.
2529    // Rather than have people making assumptions, I have disabled the method.
2530  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2531  DataLazy::getLength() const  DataLazy::getLength() const
2532  {  {
2533    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2534  }  }
2535    
2536    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 2540  DataLazy::getSlice(const DataTypes::Regi
2540    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2541  }  }
2542    
2543    
2544    // To do this we need to rely on our child nodes
2545    DataTypes::ValueType::size_type
2546    DataLazy::getPointOffset(int sampleNo,
2547                     int dataPointNo)
2548    {
2549      if (m_op==IDENTITY)
2550      {
2551        return m_id->getPointOffset(sampleNo,dataPointNo);
2552      }
2553      if (m_readytype!='E')
2554      {
2555        collapse();
2556        return m_id->getPointOffset(sampleNo,dataPointNo);
2557      }
2558      // at this point we do not have an identity node and the expression will be Expanded
2559      // so we only need to know which child to ask
2560      if (m_left->m_readytype=='E')
2561      {
2562        return m_left->getPointOffset(sampleNo,dataPointNo);
2563      }
2564      else
2565      {
2566        return m_right->getPointOffset(sampleNo,dataPointNo);
2567      }
2568    }
2569    
2570    // To do this we need to rely on our child nodes
2571  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2572  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2573                   int dataPointNo) const                   int dataPointNo) const
2574  {  {
2575    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2576      {
2577        return m_id->getPointOffset(sampleNo,dataPointNo);
2578      }
2579      if (m_readytype=='E')
2580      {
2581        // at this point we do not have an identity node and the expression will be Expanded
2582        // so we only need to know which child to ask
2583        if (m_left->m_readytype=='E')
2584        {
2585        return m_left->getPointOffset(sampleNo,dataPointNo);
2586        }
2587        else
2588        {
2589        return m_right->getPointOffset(sampleNo,dataPointNo);
2590        }
2591      }
2592      if (m_readytype=='C')
2593      {
2594        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2595      }
2596      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2597    }
2598    
2599    
2600    // I have decided to let Data:: handle this issue.
2601    void
2602    DataLazy::setToZero()
2603    {
2604    //   DataTypes::ValueType v(getNoValues(),0);
2605    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2606    //   m_op=IDENTITY;
2607    //   m_right.reset();  
2608    //   m_left.reset();
2609    //   m_readytype='C';
2610    //   m_buffsRequired=1;
2611    
2612      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2613      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2614    }
2615    
2616    bool
2617    DataLazy::actsExpanded() const
2618    {
2619        return (m_readytype=='E');
2620  }  }
2621    
2622  }   // end namespace  }   // end namespace

Legend:
Removed from v.1888  
changed lines
  Added in v.2721

  ViewVC Help
Powered by ViewVC 1.1.26