/[escript]/trunk/escript/src/DataLazy.cpp
ViewVC logotype

Diff of /trunk/escript/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1898 by jfenwick, Mon Oct 20 01:20:18 2008 UTC trunk/escript/src/DataLazy.cpp revision 2092 by jfenwick, Tue Nov 25 04:18:17 2008 UTC
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    // #define LAZYDEBUG(X) X;
32    #define LAZYDEBUG(X)
33    
34    /*
35    How does DataLazy work?
36    ~~~~~~~~~~~~~~~~~~~~~~~
37    
38    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
39    denoted left and right.
40    
41    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
42    This means that all "internal" nodes in the structure are instances of DataLazy.
43    
44    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
45    Note that IDENITY is not considered a unary operation.
46    
47    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
48    It must however form a DAG (directed acyclic graph).
49    I will refer to individual DataLazy objects with the structure as nodes.
50    
51    Each node also stores:
52    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
53        evaluated.
54    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
55        evaluate the expression.
56    - m_samplesize ~ the number of doubles stored in a sample.
57    
58    When a new node is created, the above values are computed based on the values in the child nodes.
59    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
60    
61    The resolve method, which produces a DataReady from a DataLazy, does the following:
62    1) Create a DataReady to hold the new result.
63    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
64    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
65    
66    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
67    
68    resolveSample returns a Vector* and an offset within that vector where the result is stored.
69    Normally, this would be v, but for identity nodes their internal vector is returned instead.
70    
71    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
72    
73    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
74    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
75    
76    To add a new operator you need to do the following (plus anything I might have forgotten):
77    1) Add to the ES_optype.
78    2) determine what opgroup your operation belongs to (X)
79    3) add a string for the op to the end of ES_opstrings
80    4) increase ES_opcount
81    5) add an entry (X) to opgroups
82    6) add an entry to the switch in collapseToReady
83    7) add an entry to resolveX
84    */
85    
86    
87  using namespace std;  using namespace std;
88  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 90  using namespace boost;
90  namespace escript  namespace escript
91  {  {
92    
 const std::string&  
 opToString(ES_optype op);  
   
93  namespace  namespace
94  {  {
95    
   
   
96  enum ES_opgroup  enum ES_opgroup
97  {  {
98     G_UNKNOWN,     G_UNKNOWN,
99     G_IDENTITY,     G_IDENTITY,
100     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
101     G_UNARY     G_UNARY,     // pointwise operations with one argument
102       G_NP1OUT,        // non-pointwise op with one output
103       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
104       G_TENSORPROD     // general tensor product
105  };  };
106    
107    
108    
109    
110  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
111                "sin","cos","tan",
112              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
113              "asinh","acosh","atanh",              "asinh","acosh","atanh",
114              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
115              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0",
116  int ES_opcount=32;              "symmetric","nonsymmetric",
117  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
118              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose",
119              G_UNARY,G_UNARY,G_UNARY,                    // 19              "trace"};
120              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27  int ES_opcount=38;
121              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
122                G_UNARY,G_UNARY,G_UNARY, //10
123                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
124                G_UNARY,G_UNARY,G_UNARY,                    // 20
125                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
126                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33
127                G_NP1OUT,G_NP1OUT,
128                G_TENSORPROD,
129                G_NP1OUT_P, G_NP1OUT_P};
130  inline  inline
131  ES_opgroup  ES_opgroup
132  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 143  resultFS(DataAbstract_ptr left, DataAbst
143      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
144      // programming error exception.      // programming error exception.
145    
146      FunctionSpace l=left->getFunctionSpace();
147      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
148      {    if (l!=r)
149          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
150      }      if (r.probeInterpolation(l))
151      return left->getFunctionSpace();      {
152        return l;
153        }
154        if (l.probeInterpolation(r))
155        {
156        return r;
157        }
158        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
159      }
160      return l;
161  }  }
162    
163  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
164    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
165  DataTypes::ShapeType  DataTypes::ShapeType
166  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
167  {  {
168      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
169      {      {
170          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
171          {
172            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
173          }
174          if (left->getRank()==0)   // we need to allow scalar * anything
175          {
176            return right->getShape();
177          }
178          if (right->getRank()==0)
179          {
180            return left->getShape();
181          }
182          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
183      }      }
184      return left->getShape();      return left->getShape();
185  }  }
186    
187  size_t  // return the shape for "op left"
188  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
189    DataTypes::ShapeType
190    resultShape(DataAbstract_ptr left, ES_optype op)
191  {  {
192     switch (getOpgroup(op))      switch(op)
193     {      {
194     case G_BINARY: return left->getLength();          case TRANS:
195     case G_UNARY: return left->getLength();          return left->getShape();
196     default:      break;
197      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");      case TRACE:
198     }          return DataTypes::scalarShape;
199        break;
200            default:
201        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
202        }
203  }  }
204    
205    // determine the output shape for the general tensor product operation
206    // the additional parameters return information required later for the product
207    // the majority of this code is copy pasted from C_General_Tensor_Product
208    DataTypes::ShapeType
209    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
210    {
211        
212      // Get rank and shape of inputs
213      int rank0 = left->getRank();
214      int rank1 = right->getRank();
215      const DataTypes::ShapeType& shape0 = left->getShape();
216      const DataTypes::ShapeType& shape1 = right->getShape();
217    
218      // Prepare for the loops of the product and verify compatibility of shapes
219      int start0=0, start1=0;
220      if (transpose == 0)       {}
221      else if (transpose == 1)  { start0 = axis_offset; }
222      else if (transpose == 2)  { start1 = rank1-axis_offset; }
223      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
224    
225      if (rank0<axis_offset)
226      {
227        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
228      }
229    
230      // Adjust the shapes for transpose
231      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
232      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
233      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
234      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
235    
236      // Prepare for the loops of the product
237      SL=1, SM=1, SR=1;
238      for (int i=0; i<rank0-axis_offset; i++)   {
239        SL *= tmpShape0[i];
240      }
241      for (int i=rank0-axis_offset; i<rank0; i++)   {
242        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
243          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
244        }
245        SM *= tmpShape0[i];
246      }
247      for (int i=axis_offset; i<rank1; i++)     {
248        SR *= tmpShape1[i];
249      }
250    
251      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
252      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
253      {         // block to limit the scope of out_index
254         int out_index=0;
255         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
256         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
257      }
258    
259      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
260      {
261         ostringstream os;
262         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
263         throw DataException(os.str());
264      }
265    
266      return shape2;
267    }
268    
269    
270    // determine the number of points in the result of "left op right"
271    // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here
272    // size_t
273    // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
274    // {
275    //    switch (getOpgroup(op))
276    //    {
277    //    case G_BINARY: return left->getLength();
278    //    case G_UNARY: return left->getLength();
279    //    case G_NP1OUT: return left->getLength();
280    //    default:
281    //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");
282    //    }
283    // }
284    
285    // determine the number of samples requires to evaluate an expression combining left and right
286    // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
287    // The same goes for G_TENSORPROD
288  int  int
289  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
290  {  {
# Line 118  calcBuffs(const DataLazy_ptr& left, cons Line 293  calcBuffs(const DataLazy_ptr& left, cons
293     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
294     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);
295     case G_UNARY: return max(left->getBuffsRequired(),1);     case G_UNARY: return max(left->getBuffsRequired(),1);
296       case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
297       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
298       case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
299     default:     default:
300      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
301     }     }
302  }  }
303    
304    
305  }   // end anonymous namespace  }   // end anonymous namespace
306    
307    
308    
309    // Return a string representing the operation
310  const std::string&  const std::string&
311  opToString(ES_optype op)  opToString(ES_optype op)
312  {  {
# Line 139  opToString(ES_optype op) Line 320  opToString(ES_optype op)
320    
321  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
322      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape()),
323      m_op(IDENTITY)      m_op(IDENTITY),
324        m_axis_offset(0),
325        m_transpose(0),
326        m_SL(0), m_SM(0), m_SR(0)
327  {  {
328     if (p->isLazy())     if (p->isLazy())
329     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
330      // I don't want identity of Lazy.      // I don't want identity of Lazy.
331      // Question: Why would that be so bad?      // Question: Why would that be so bad?
332      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 157  DataLazy::DataLazy(DataAbstract_ptr p) Line 340  DataLazy::DataLazy(DataAbstract_ptr p)
340      else if (p->isTagged()) {m_readytype='T';}      else if (p->isTagged()) {m_readytype='T';}
341      else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}      else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}
342     }     }
    m_length=p->getLength();  
343     m_buffsRequired=1;     m_buffsRequired=1;
344     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
345  cout << "(1)Lazy created with " << m_samplesize << endl;     m_maxsamplesize=m_samplesize;
346    LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
347  }  }
348    
349    
350    
351    
352  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
353      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),left->getShape()),
354      m_op(op)      m_op(op),
355        m_axis_offset(0),
356        m_transpose(0),
357        m_SL(0), m_SM(0), m_SR(0)
358  {  {
359     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
360     {     {
361      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
362     }     }
363    
364     DataLazy_ptr lleft;     DataLazy_ptr lleft;
365     if (!left->isLazy())     if (!left->isLazy())
366     {     {
# Line 181  DataLazy::DataLazy(DataAbstract_ptr left Line 371  DataLazy::DataLazy(DataAbstract_ptr left
371      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
372     }     }
373     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
374     m_left=lleft;     m_left=lleft;
375     m_buffsRequired=1;     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
376     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
377       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
378  }  }
379    
380    
381  // DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
 //  : parent(resultFS(left,right,op), resultShape(left,right,op)),  
 //  m_left(left),  
 //  m_right(right),  
 //  m_op(op)  
 // {  
 //    if (getOpgroup(op)!=G_BINARY)  
 //    {  
 //  throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");  
 //    }  
 //    m_length=resultLength(m_left,m_right,m_op);  
 //    m_samplesize=getNumDPPSample()*getNoValues();  
 //    m_buffsRequired=calcBuffs(m_left, m_right, m_op);  
 // cout << "(2)Lazy created with " << m_samplesize << endl;  
 // }  
   
382  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
383      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
384      m_op(op)      m_op(op),
385        m_SL(0), m_SM(0), m_SR(0)
386  {  {
387     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_BINARY))
388     {     {
389      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
390     }     }
391     if (left->isLazy())  
392       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
393       {
394        FunctionSpace fs=getFunctionSpace();
395        Data ltemp(left);
396        Data tmp(ltemp,fs);
397        left=tmp.borrowDataPtr();
398       }
399       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
400       {
401        Data tmp(Data(right),getFunctionSpace());
402        right=tmp.borrowDataPtr();
403       }
404       left->operandCheck(*right);
405    
406       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
407       {
408        m_left=dynamic_pointer_cast<DataLazy>(left);
409       }
410       else
411       {
412        m_left=DataLazy_ptr(new DataLazy(left));
413       }
414       if (right->isLazy())
415       {
416        m_right=dynamic_pointer_cast<DataLazy>(right);
417       }
418       else
419       {
420        m_right=DataLazy_ptr(new DataLazy(right));
421       }
422       char lt=m_left->m_readytype;
423       char rt=m_right->m_readytype;
424       if (lt=='E' || rt=='E')
425       {
426        m_readytype='E';
427       }
428       else if (lt=='T' || rt=='T')
429       {
430        m_readytype='T';
431       }
432       else
433       {
434        m_readytype='C';
435       }
436       m_samplesize=getNumDPPSample()*getNoValues();
437       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
438       m_buffsRequired=calcBuffs(m_left, m_right,m_op);
439    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
440    }
441    
442    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
443        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
444        m_op(op),
445        m_axis_offset(axis_offset),
446        m_transpose(transpose)
447    {
448       if ((getOpgroup(op)!=G_TENSORPROD))
449       {
450        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
451       }
452       if ((transpose>2) || (transpose<0))
453       {
454        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
455       }
456       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
457       {
458        FunctionSpace fs=getFunctionSpace();
459        Data ltemp(left);
460        Data tmp(ltemp,fs);
461        left=tmp.borrowDataPtr();
462       }
463       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
464       {
465        Data tmp(Data(right),getFunctionSpace());
466        right=tmp.borrowDataPtr();
467       }
468       left->operandCheck(*right);
469    
470       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
471     {     {
472      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
473     }     }
# Line 242  DataLazy::DataLazy(DataAbstract_ptr left Line 497  DataLazy::DataLazy(DataAbstract_ptr left
497     {     {
498      m_readytype='C';      m_readytype='C';
499     }     }
    m_length=resultLength(m_left,m_right,m_op);  
500     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
501       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
502     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
503  cout << "(3)Lazy created with " << m_samplesize << endl;  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
504    }
505    
506    
507    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
508        : parent(left->getFunctionSpace(), resultShape(left,op)),
509        m_op(op),
510        m_axis_offset(axis_offset),
511        m_transpose(0)
512    {
513       if ((getOpgroup(op)!=G_NP1OUT_P))
514       {
515        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
516       }
517       DataLazy_ptr lleft;
518       if (!left->isLazy())
519       {
520        lleft=DataLazy_ptr(new DataLazy(left));
521       }
522       else
523       {
524        lleft=dynamic_pointer_cast<DataLazy>(left);
525       }
526       m_readytype=lleft->m_readytype;
527       m_left=lleft;
528       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
529       m_samplesize=getNumDPPSample()*getNoValues();
530       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
531    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
532  }  }
533    
534    
# Line 261  DataLazy::getBuffsRequired() const Line 544  DataLazy::getBuffsRequired() const
544  }  }
545    
546    
547    size_t
548    DataLazy::getMaxSampleSize() const
549    {
550        return m_maxsamplesize;
551    }
552    
553    /*
554      \brief Evaluates the expression using methods on Data.
555      This does the work for the collapse method.
556      For reasons of efficiency do not call this method on DataExpanded nodes.
557    */
558  DataReady_ptr  DataReady_ptr
559  DataLazy::collapseToReady()  DataLazy::collapseToReady()
560  {  {
# Line 275  DataLazy::collapseToReady() Line 569  DataLazy::collapseToReady()
569    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
570    Data left(pleft);    Data left(pleft);
571    Data right;    Data right;
572    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
573    {    {
574      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
575    }    }
# Line 374  DataLazy::collapseToReady() Line 668  DataLazy::collapseToReady()
668      case LEZ:      case LEZ:
669      result=left.whereNonPositive();      result=left.whereNonPositive();
670      break;      break;
671        case SYM:
672        result=left.symmetric();
673        break;
674        case NSYM:
675        result=left.nonsymmetric();
676        break;
677        case PROD:
678        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
679        break;
680        case TRANS:
681        result=left.transpose(m_axis_offset);
682        break;
683        case TRACE:
684        result=left.trace(m_axis_offset);
685        break;
686      default:      default:
687      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
688    }    }
689    return result.borrowReadyPtr();    return result.borrowReadyPtr();
690  }  }
691    
692    /*
693       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
694       This method uses the original methods on the Data class to evaluate the expressions.
695       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
696       the purpose of using DataLazy in the first place).
697    */
698  void  void
699  DataLazy::collapse()  DataLazy::collapse()
700  {  {
# Line 395  DataLazy::collapse() Line 710  DataLazy::collapse()
710    m_op=IDENTITY;    m_op=IDENTITY;
711  }  }
712    
713    /*
714      \brief Compute the value of the expression (unary operation) for the given sample.
715      \return Vector which stores the value of the subexpression for the given sample.
716      \param v A vector to store intermediate results.
717      \param offset Index in v to begin storing results.
718      \param sampleNo Sample number to evaluate.
719      \param roffset (output parameter) the offset in the return vector where the result begins.
720    
721      The return value will be an existing vector so do not deallocate it.
722      If the result is stored in v it should be stored at the offset given.
723      Everything from offset to the end of v should be considered available for this method to use.
724    */
725  DataTypes::ValueType*  DataTypes::ValueType*
726  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
727  {  {
# Line 412  DataLazy::resolveUnary(ValueType& v, siz Line 739  DataLazy::resolveUnary(ValueType& v, siz
739    switch (m_op)    switch (m_op)
740    {    {
741      case SIN:        case SIN:  
742      tensor_unary_operation(m_samplesize, left, result, ::sin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
743      break;      break;
744      case COS:      case COS:
745      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
746      break;      break;
747      case TAN:      case TAN:
748      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
749      break;      break;
750      case ASIN:      case ASIN:
751      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
752      break;      break;
753      case ACOS:      case ACOS:
754      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
755      break;      break;
756      case ATAN:      case ATAN:
757      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
758      break;      break;
759      case SINH:      case SINH:
760      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
761      break;      break;
762      case COSH:      case COSH:
763      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
764      break;      break;
765      case TANH:      case TANH:
766      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
767      break;      break;
768      case ERF:      case ERF:
769  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
770      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
771  #else  #else
772      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
773      break;      break;
774  #endif  #endif
775     case ASINH:     case ASINH:
776  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
777      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
778  #else  #else
779      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
780  #endif    #endif  
781      break;      break;
782     case ACOSH:     case ACOSH:
783  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
784      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
785  #else  #else
786      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
787  #endif    #endif  
788      break;      break;
789     case ATANH:     case ATANH:
790  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
791      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
792  #else  #else
793      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
794  #endif    #endif  
795      break;      break;
796      case LOG10:      case LOG10:
797      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
798      break;      break;
799      case LOG:      case LOG:
800      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
801      break;      break;
802      case SIGN:      case SIGN:
803      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
804      break;      break;
805      case ABS:      case ABS:
806      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
807      break;      break;
808      case NEG:      case NEG:
809      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 487  DataLazy::resolveUnary(ValueType& v, siz Line 814  DataLazy::resolveUnary(ValueType& v, siz
814      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
815      break;      break;
816      case EXP:      case EXP:
817      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
818      break;      break;
819      case SQRT:      case SQRT:
820      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
821      break;      break;
822      case RECIP:      case RECIP:
823      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 515  DataLazy::resolveUnary(ValueType& v, siz Line 842  DataLazy::resolveUnary(ValueType& v, siz
842  }  }
843    
844    
845    /*
846      \brief Compute the value of the expression (unary operation) for the given sample.
847      \return Vector which stores the value of the subexpression for the given sample.
848      \param v A vector to store intermediate results.
849      \param offset Index in v to begin storing results.
850      \param sampleNo Sample number to evaluate.
851      \param roffset (output parameter) the offset in the return vector where the result begins.
852    
853      The return value will be an existing vector so do not deallocate it.
854      If the result is stored in v it should be stored at the offset given.
855      Everything from offset to the end of v should be considered available for this method to use.
856    */
857    DataTypes::ValueType*
858    DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
859    {
860        // we assume that any collapsing has been done before we get here
861        // since we only have one argument we don't need to think about only
862        // processing single points.
863      if (m_readytype!='E')
864      {
865        throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
866      }
867        // since we can't write the result over the input, we need a result offset further along
868      size_t subroffset=roffset+m_samplesize;
869      const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);
870      roffset=offset;
871      switch (m_op)
872      {
873        case SYM:
874        DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
875        break;
876        case NSYM:
877        DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
878        break;
879        default:
880        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
881      }
882      return &v;
883    }
884    
885    /*
886      \brief Compute the value of the expression (unary operation) for the given sample.
887      \return Vector which stores the value of the subexpression for the given sample.
888      \param v A vector to store intermediate results.
889      \param offset Index in v to begin storing results.
890      \param sampleNo Sample number to evaluate.
891      \param roffset (output parameter) the offset in the return vector where the result begins.
892    
893      The return value will be an existing vector so do not deallocate it.
894      If the result is stored in v it should be stored at the offset given.
895      Everything from offset to the end of v should be considered available for this method to use.
896    */
897    DataTypes::ValueType*
898    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
899    {
900        // we assume that any collapsing has been done before we get here
901        // since we only have one argument we don't need to think about only
902        // processing single points.
903      if (m_readytype!='E')
904      {
905        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
906      }
907        // since we can't write the result over the input, we need a result offset further along
908      size_t subroffset=roffset+m_samplesize;
909      const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);
910      roffset=offset;
911      switch (m_op)
912      {
913        case TRACE:
914             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
915        break;
916        case TRANS:
917             DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
918        break;
919        default:
920        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
921      }
922      return &v;
923    }
924    
 // const double*  
 // DataLazy::resolveUnary(ValueType& v,int sampleNo,  size_t offset) const  
 // {  
 //  // we assume that any collapsing has been done before we get here  
 //  // since we only have one argument we don't need to think about only  
 //  // processing single points.  
 //   if (m_readytype!='E')  
 //   {  
 //     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
 //   }  
 //   const double* left=m_left->resolveSample(v,sampleNo,offset);  
 //   double* result=&(v[offset]);  
 //   switch (m_op)  
 //   {  
 //     case SIN:      
 //  tensor_unary_operation(m_samplesize, left, result, ::sin);  
 //  break;  
 //     case COS:  
 //  tensor_unary_operation(m_samplesize, left, result, ::cos);  
 //  break;  
 //     case TAN:  
 //  tensor_unary_operation(m_samplesize, left, result, ::tan);  
 //  break;  
 //     case ASIN:  
 //  tensor_unary_operation(m_samplesize, left, result, ::asin);  
 //  break;  
 //     case ACOS:  
 //  tensor_unary_operation(m_samplesize, left, result, ::acos);  
 //  break;  
 //     case ATAN:  
 //  tensor_unary_operation(m_samplesize, left, result, ::atan);  
 //  break;  
 //     case SINH:  
 //  tensor_unary_operation(m_samplesize, left, result, ::sinh);  
 //  break;  
 //     case COSH:  
 //  tensor_unary_operation(m_samplesize, left, result, ::cosh);  
 //  break;  
 //     case TANH:  
 //  tensor_unary_operation(m_samplesize, left, result, ::tanh);  
 //  break;  
 //     case ERF:  
 // #ifdef _WIN32  
 //  throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::erf);  
 //  break;  
 // #endif  
 //    case ASINH:  
 // #ifdef _WIN32  
 //  tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 // #endif    
 //  break;  
 //    case ACOSH:  
 // #ifdef _WIN32  
 //  tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 // #endif    
 //  break;  
 //    case ATANH:  
 // #ifdef _WIN32  
 //  tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 // #endif    
 //  break;  
 //     case LOG10:  
 //  tensor_unary_operation(m_samplesize, left, result, ::log10);  
 //  break;  
 //     case LOG:  
 //  tensor_unary_operation(m_samplesize, left, result, ::log);  
 //  break;  
 //     case SIGN:  
 //  tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
 //  break;  
 //     case ABS:  
 //  tensor_unary_operation(m_samplesize, left, result, ::fabs);  
 //  break;  
 //     case NEG:  
 //  tensor_unary_operation(m_samplesize, left, result, negate<double>());  
 //  break;  
 //     case POS:  
 //  // it doesn't mean anything for delayed.  
 //  // it will just trigger a deep copy of the lazy object  
 //  throw DataException("Programmer error - POS not supported for lazy data.");  
 //  break;  
 //     case EXP:  
 //  tensor_unary_operation(m_samplesize, left, result, ::exp);  
 //  break;  
 //     case SQRT:  
 //  tensor_unary_operation(m_samplesize, left, result, ::sqrt);  
 //  break;  
 //     case RECIP:  
 //  tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
 //  break;  
 //     case GZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
 //  break;  
 //     case LZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
 //  break;  
 //     case GEZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
 //  break;  
 //     case LEZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
 //  break;  
 //  
 //     default:  
 //  throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");  
 //   }  
 //   return result;  
 // }  
925    
926  #define PROC_OP(X) \  #define PROC_OP(TYPE,X)                               \
927      for (int i=0;i<steps;++i,resultp+=getNoValues()) \      for (int i=0;i<steps;++i,resultp+=resultStep) \
928      { \      { \
929  cout << "Step#" << i << " chunk=" << chunksize << endl; \         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
930  cout << left[0] << left[1] << left[2] << endl; \         lroffset+=leftStep; \
931  cout << right[0] << right[1] << right[2] << endl; \         rroffset+=rightStep; \
        tensor_binary_operation(chunksize, left, right, resultp, X); \  
        left+=leftStep; \  
        right+=rightStep; \  
 cout << "Result=" << result << " " << result[0] << result[1] << result[2] << endl; \  
932      }      }
933    
934    /*
935      \brief Compute the value of the expression (binary operation) for the given sample.
936      \return Vector which stores the value of the subexpression for the given sample.
937      \param v A vector to store intermediate results.
938      \param offset Index in v to begin storing results.
939      \param sampleNo Sample number to evaluate.
940      \param roffset (output parameter) the offset in the return vector where the result begins.
941    
942      The return value will be an existing vector so do not deallocate it.
943      If the result is stored in v it should be stored at the offset given.
944      Everything from offset to the end of v should be considered available for this method to use.
945    */
946    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
947    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
948    // If both children are expanded, then we can process them in a single operation (we treat
949    // the whole sample as one big datapoint.
950    // If one of the children is not expanded, then we need to treat each point in the sample
951    // individually.
952    // There is an additional complication when scalar operations are considered.
953    // For example, 2+Vector.
954    // In this case each double within the point is treated individually
955  DataTypes::ValueType*  DataTypes::ValueType*
956  DataLazy::resolveBinary(ValueType& v,  size_t offset ,int sampleNo, size_t& roffset) const  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
957  {  {
958      // again we assume that all collapsing has already been done  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
     // so we have at least one expanded child.  
     // however, we could still have one of the children being not expanded.  
   
 cout << "Resolve binary: " << toString() << endl;  
   
   size_t lroffset=0, rroffset=0;  
959    
960      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
961        // first work out which of the children are expanded
962    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
963    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
964    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step    if (!leftExp && !rightExp)
965      {
966        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
967      }
968      bool leftScalar=(m_left->getRank()==0);
969      bool rightScalar=(m_right->getRank()==0);
970      bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?
971    int steps=(bigloops?1:getNumDPPSample());    int steps=(bigloops?1:getNumDPPSample());
972    size_t chunksize=(bigloops? m_samplesize : getNoValues());    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint
973    int leftStep=((leftExp && !rightExp)? getNoValues() : 0);    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops
974    int rightStep=((rightExp && !leftExp)? getNoValues() : 0);    {
975        if (!leftScalar && !rightScalar)
976        {
977           throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
978        }
979        steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());
980        chunksize=1;    // for scalar
981      }    
982      int leftStep=((leftExp && (!rightExp || rightScalar))? m_right->getNoValues() : 0);
983      int rightStep=((rightExp && (!leftExp || leftScalar))? m_left->getNoValues() : 0);
984      int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0
985        // Get the values of sub-expressions
986    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);
987    const ValueType* right=m_right->resolveSample(v,offset,sampleNo,rroffset);        const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note
988      // now we need to know which args are expanded      // the right child starts further along.
989  cout << "left=" << left << " right=" << right << endl;    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
 cout << "(Length) l=" << left->size() << " r=" << right->size() << " res=" << v.size() << endl;  
   double* resultp=&(v[offset]);  
990    switch(m_op)    switch(m_op)
991    {    {
992      case ADD:      case ADD:
993      for (int i=0;i<steps;++i,resultp+=getNoValues())          PROC_OP(NO_ARG,plus<double>());
994      {      break;
995  cerr << "Step#" << i << " chunk=" << chunksize << endl;      case SUB:
996  cerr << left << "[" << lroffset << "] " << right << "[" << rroffset << "]" << endl;      PROC_OP(NO_ARG,minus<double>());
997         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, plus<double>());      break;
998         lroffset+=leftStep;      case MUL:
999         rroffset+=rightStep;      PROC_OP(NO_ARG,multiplies<double>());
1000  cerr << "left=" << lroffset << " right=" << rroffset << endl;      break;
1001      }      case DIV:
1002        PROC_OP(NO_ARG,divides<double>());
1003        break;
1004        case POW:
1005           PROC_OP(double (double,double),::pow);
1006      break;      break;
 // need to fill in the rest  
1007      default:      default:
1008      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1009    }    }
1010    roffset=offset;    roffset=offset;  
1011    return &v;    return &v;
1012  }  }
1013    
1014    
1015    /*
1016      \brief Compute the value of the expression (tensor product) for the given sample.
1017      \return Vector which stores the value of the subexpression for the given sample.
1018      \param v A vector to store intermediate results.
1019      \param offset Index in v to begin storing results.
1020      \param sampleNo Sample number to evaluate.
1021      \param roffset (output parameter) the offset in the return vector where the result begins.
1022    
1023      The return value will be an existing vector so do not deallocate it.
1024      If the result is stored in v it should be stored at the offset given.
1025      Everything from offset to the end of v should be considered available for this method to use.
1026    */
1027    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1028    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1029    // unlike the other resolve helpers, we must treat these datapoints separately.
1030    DataTypes::ValueType*
1031    DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1032    {
1033    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1034    
1035  // #define PROC_OP(X) \    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1036  //  for (int i=0;i<steps;++i,resultp+=getNoValues()) \      // first work out which of the children are expanded
1037  //  { \    bool leftExp=(m_left->m_readytype=='E');
1038  // cout << "Step#" << i << " chunk=" << chunksize << endl; \    bool rightExp=(m_right->m_readytype=='E');
1039  // cout << left[0] << left[1] << left[2] << endl; \    int steps=getNumDPPSample();
1040  // cout << right[0] << right[1] << right[2] << endl; \    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1041  //     tensor_binary_operation(chunksize, left, right, resultp, X); \    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1042  //     left+=leftStep; \    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0
1043  //     right+=rightStep; \      // Get the values of sub-expressions (leave a gap of one sample for the result).
1044  // cout << "Result=" << result << " " << result[0] << result[1] << result[2] << endl; \    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);
1045  //  }    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);
1046  //    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1047  // const double*    switch(m_op)
1048  // DataLazy::resolveBinary(ValueType& v,int sampleNo,  size_t offset) const    {
1049  // {      case PROD:
1050  //  // again we assume that all collapsing has already been done      for (int i=0;i<steps;++i,resultp+=resultStep)
1051  //  // so we have at least one expanded child.      {
1052  //  // however, we could still have one of the children being not expanded.            const double *ptr_0 = &((*left)[lroffset]);
1053  //            const double *ptr_1 = &((*right)[rroffset]);
1054  // cout << "Resolve binary: " << toString() << endl;            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1055  //        lroffset+=leftStep;
1056  //   const double* left=m_left->resolveSample(v,sampleNo,offset);        rroffset+=rightStep;
1057  // // cout << "Done Left " << /*left[0] << left[1] << left[2] << */endl;      }
1058  //   const double* right=m_right->resolveSample(v,sampleNo,offset);      break;
1059  // // cout << "Done Right"  << /*right[0] << right[1] << right[2] <<*/ endl;      default:
1060  //      // now we need to know which args are expanded      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1061  //   bool leftExp=(m_left->m_readytype=='E');    }
1062  //   bool rightExp=(m_right->m_readytype=='E');    roffset=offset;
1063  //   bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp));  // is processing in single step    return &v;
1064  //   int steps=(bigloops?1:getNumSamples());  }
 //   size_t chunksize=(bigloops? m_samplesize : getNoValues());  
 //   int leftStep=((leftExp && !rightExp)? getNoValues() : 0);  
 //   int rightStep=((rightExp && !leftExp)? getNoValues() : 0);  
 // cout << "left=" << left << " right=" << right << endl;  
 //   double* result=&(v[offset]);  
 //   double* resultp=result;  
 //   switch(m_op)  
 //   {  
 //     case ADD:  
 //  for (int i=0;i<steps;++i,resultp+=getNoValues())  
 //  {  
 // cout << "Step#" << i << " chunk=" << chunksize << endl; \  
 // // cout << left[0] << left[1] << left[2] << endl;  
 // // cout << right[0] << right[1] << right[2] << endl;  
 //     tensor_binary_operation(chunksize, left, right, resultp, plus<double>());  
 // cout << "left=" << left << " right=" << right << " resp=" << resultp << endl;  
 //     left+=leftStep;  
 //     right+=rightStep;  
 // cout << "left=" << left << " right=" << right << endl;  
 // // cout << "Result=" << result << " " << result[0] << result[1] << result[2] << endl;  
 //  }  
 //  break;  
 // // need to fill in the rest  
 //     default:  
 //  throw DataException("Programmer error - resolveBinay can not resolve operator "+opToString(m_op)+".");  
 //   }  
 // // cout << "About to return "  << result[0] << result[1] << result[2] << endl;;  
 //   return result;  
 // }  
1065    
 // // the vector and the offset are a place where the method could write its data if it wishes  
 // // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // // Hence the return value to indicate where the data is actually stored.  
 // // Regardless, the storage should be assumed to be used, even if it isn't.  
 // const double*  
 // DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset )  
 // {  
 // cout << "Resolve sample " << toString() << endl;  
 //  // collapse so we have a 'E' node or an IDENTITY for some other type  
 //   if (m_readytype!='E' && m_op!=IDENTITY)  
 //   {  
 //  collapse();  
 //   }  
 //   if (m_op==IDENTITY)      
 //   {  
 //     const ValueType& vec=m_id->getVector();  
 //     if (m_readytype=='C')  
 //     {  
 //  return &(vec[0]);  
 //     }  
 //     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
 //   }  
 //   if (m_readytype!='E')  
 //   {  
 //     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
 //   }  
 //   switch (getOpgroup(m_op))  
 //   {  
 //   case G_UNARY: return resolveUnary(v,sampleNo,offset);  
 //   case G_BINARY: return resolveBinary(v,sampleNo,offset);  
 //   default:  
 //     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
 //   }  
 // }  
1066    
1067    
1068    /*
1069      \brief Compute the value of the expression for the given sample.
1070      \return Vector which stores the value of the subexpression for the given sample.
1071      \param v A vector to store intermediate results.
1072      \param offset Index in v to begin storing results.
1073      \param sampleNo Sample number to evaluate.
1074      \param roffset (output parameter) the offset in the return vector where the result begins.
1075    
1076      The return value will be an existing vector so do not deallocate it.
1077    */
1078  // the vector and the offset are a place where the method could write its data if it wishes  // the vector and the offset are a place where the method could write its data if it wishes
1079  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1080  // Hence the return value to indicate where the data is actually stored.  // Hence the return value to indicate where the data is actually stored.
# Line 799  cerr << "left=" << lroffset << " right=" Line 1084  cerr << "left=" << lroffset << " right="
1084  const DataTypes::ValueType*  const DataTypes::ValueType*
1085  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1086  {  {
1087  cout << "Resolve sample " << toString() << endl;  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1088      // collapse so we have a 'E' node or an IDENTITY for some other type      // collapse so we have a 'E' node or an IDENTITY for some other type
1089    if (m_readytype!='E' && m_op!=IDENTITY)    if (m_readytype!='E' && m_op!=IDENTITY)
1090    {    {
# Line 824  cout << "Resolve sample " << toString() Line 1109  cout << "Resolve sample " << toString()
1109    {    {
1110    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);
1111    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1112      case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1113      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1114      case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1115    default:    default:
1116      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1117    }    }
1118  }  }
1119    
1120    
1121    // To simplify the memory management, all threads operate on one large vector, rather than one each.
1122    // Each sample is evaluated independently and copied into the result DataExpanded.
 // This version uses double* trying again with vectors  
 // DataReady_ptr  
 // DataLazy::resolve()  
 // {  
 //  
 // cout << "Sample size=" << m_samplesize << endl;  
 // cout << "Buffers=" << m_buffsRequired << endl;  
 //  
 //   if (m_readytype!='E')  
 //   {  
 //     collapse();  
 //   }  
 //   if (m_op==IDENTITY)  
 //   {  
 //     return m_id;  
 //   }  
 //      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'  
 //   size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  
 //   int numthreads=1;  
 // #ifdef _OPENMP  
 //   numthreads=getNumberOfThreads();  
 //   int threadnum=0;  
 // #endif  
 //   ValueType v(numthreads*threadbuffersize);    
 // cout << "Buffer created with size=" << v.size() << endl;  
 //   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
 //   ValueType& resvec=result->getVector();  
 //   DataReady_ptr resptr=DataReady_ptr(result);  
 //   int sample;  
 //   int resoffset;  
 //   int totalsamples=getNumSamples();  
 //   const double* res=0;  
 //   #pragma omp parallel for private(sample,resoffset,threadnum,res) schedule(static)  
 //   for (sample=0;sample<totalsamples;++sample)  
 //   {  
 // cout << "################################# " << sample << endl;  
 // #ifdef _OPENMP  
 //     res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());  
 // #else  
 //     res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.  
 // #endif  
 // cerr << "-------------------------------- " << endl;  
 //     resoffset=result->getPointOffset(sample,0);  
 // cerr << "offset=" << resoffset << endl;  
 //     for (unsigned int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  
 //     {  
 //  resvec[resoffset]=res[i];  
 //     }  
 // cerr << "*********************************" << endl;  
 //   }  
 //   return resptr;  
 // }  
   
   
1123  DataReady_ptr  DataReady_ptr
1124  DataLazy::resolve()  DataLazy::resolve()
1125  {  {
1126    
1127  cout << "Sample size=" << m_samplesize << endl;  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1128  cout << "Buffers=" << m_buffsRequired << endl;  LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1129    
1130    if (m_readytype!='E')    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1131    {    {
1132      collapse();      collapse();
1133    }    }
1134    if (m_op==IDENTITY)    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1135    {    {
1136      return m_id;      return m_id;
1137    }    }
1138      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1139    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);    size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1140        // storage to evaluate its expression
1141    int numthreads=1;    int numthreads=1;
1142  #ifdef _OPENMP  #ifdef _OPENMP
1143    numthreads=getNumberOfThreads();    numthreads=getNumberOfThreads();
   int threadnum=0;  
1144  #endif  #endif
1145    ValueType v(numthreads*threadbuffersize);    ValueType v(numthreads*threadbuffersize);
1146  cout << "Buffer created with size=" << v.size() << endl;  LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1147    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1148    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVector();
1149    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1150    int sample;    int sample;
1151    size_t outoffset;     // offset in the output data    size_t outoffset;     // offset in the output data
1152    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1153    const ValueType* res=0;    const ValueType* res=0;   // Vector storing the answer
1154    size_t resoffset=0;    size_t resoffset=0;       // where in the vector to find the answer
1155    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1156    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
1157    {    {
1158  cout << "################################# " << sample << endl;  LAZYDEBUG(cout << "################################# " << sample << endl;)
1159  #ifdef _OPENMP  #ifdef _OPENMP
1160      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1161  #else  #else
1162      res=resolveSample(v,0,sample,resoffset);   // this would normally be v, but not if its a single IDENTITY op.      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
1163  #endif  #endif
1164  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1165      outoffset=result->getPointOffset(sample,0);      outoffset=result->getPointOffset(sample,0);
1166  cerr << "offset=" << outoffset << endl;  LAZYDEBUG(cerr << "offset=" << outoffset << endl;)
1167      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
1168      {      {
1169      resvec[outoffset]=(*res)[resoffset];      resvec[outoffset]=(*res)[resoffset];
1170      }      }
1171  cerr << "*********************************" << endl;  LAZYDEBUG(cerr << "*********************************" << endl;)
1172    }    }
1173    return resptr;    return resptr;
1174  }  }
# Line 948  DataLazy::toString() const Line 1182  DataLazy::toString() const
1182    return oss.str();    return oss.str();
1183  }  }
1184    
1185    
1186  void  void
1187  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1188  {  {
# Line 980  DataLazy::intoString(ostringstream& oss) Line 1215  DataLazy::intoString(ostringstream& oss)
1215      oss << ')';      oss << ')';
1216      break;      break;
1217    case G_UNARY:    case G_UNARY:
1218      case G_NP1OUT:
1219      case G_NP1OUT_P:
1220      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1221      m_left->intoString(oss);      m_left->intoString(oss);
1222      oss << ')';      oss << ')';
1223      break;      break;
1224      case G_TENSORPROD:
1225        oss << opToString(m_op) << '(';
1226        m_left->intoString(oss);
1227        oss << ", ";
1228        m_right->intoString(oss);
1229        oss << ')';
1230        break;
1231    default:    default:
1232      oss << "UNKNOWN";      oss << "UNKNOWN";
1233    }    }
1234  }  }
1235    
 // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
1236  DataAbstract*  DataAbstract*
1237  DataLazy::deepCopy()  DataLazy::deepCopy()
1238  {  {
1239    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1240    {    {
1241      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1242      case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1243      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1244      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1245      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1246      default:
1247        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1248    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1249  }  }
1250    
1251    
1252    // There is no single, natural interpretation of getLength on DataLazy.
1253    // Instances of DataReady can look at the size of their vectors.
1254    // For lazy though, it could be the size the data would be if it were resolved;
1255    // or it could be some function of the lengths of the DataReady instances which
1256    // form part of the expression.
1257    // Rather than have people making assumptions, I have disabled the method.
1258  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1259  DataLazy::getLength() const  DataLazy::getLength() const
1260  {  {
1261    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1262  }  }
1263    
1264    
# Line 1015  DataLazy::getSlice(const DataTypes::Regi Line 1268  DataLazy::getSlice(const DataTypes::Regi
1268    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1269  }  }
1270    
1271    
1272    // To do this we need to rely on our child nodes
1273    DataTypes::ValueType::size_type
1274    DataLazy::getPointOffset(int sampleNo,
1275                     int dataPointNo)
1276    {
1277      if (m_op==IDENTITY)
1278      {
1279        return m_id->getPointOffset(sampleNo,dataPointNo);
1280      }
1281      if (m_readytype!='E')
1282      {
1283        collapse();
1284        return m_id->getPointOffset(sampleNo,dataPointNo);
1285      }
1286      // at this point we do not have an identity node and the expression will be Expanded
1287      // so we only need to know which child to ask
1288      if (m_left->m_readytype=='E')
1289      {
1290        return m_left->getPointOffset(sampleNo,dataPointNo);
1291      }
1292      else
1293      {
1294        return m_right->getPointOffset(sampleNo,dataPointNo);
1295      }
1296    }
1297    
1298    // To do this we need to rely on our child nodes
1299  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1300  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1301                   int dataPointNo) const                   int dataPointNo) const
1302  {  {
1303    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1304      {
1305        return m_id->getPointOffset(sampleNo,dataPointNo);
1306      }
1307      if (m_readytype=='E')
1308      {
1309        // at this point we do not have an identity node and the expression will be Expanded
1310        // so we only need to know which child to ask
1311        if (m_left->m_readytype=='E')
1312        {
1313        return m_left->getPointOffset(sampleNo,dataPointNo);
1314        }
1315        else
1316        {
1317        return m_right->getPointOffset(sampleNo,dataPointNo);
1318        }
1319      }
1320      if (m_readytype=='C')
1321      {
1322        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1323      }
1324      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1325    }
1326    
1327    // It would seem that DataTagged will need to be treated differently since even after setting all tags
1328    // to zero, all the tags from all the DataTags would be in the result.
1329    // However since they all have the same value (0) whether they are there or not should not matter.
1330    // So I have decided that for all types this method will create a constant 0.
1331    // It can be promoted up as required.
1332    // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1333    // but we can deal with that if it arrises.
1334    void
1335    DataLazy::setToZero()
1336    {
1337      DataTypes::ValueType v(getNoValues(),0);
1338      m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1339      m_op=IDENTITY;
1340      m_right.reset();  
1341      m_left.reset();
1342      m_readytype='C';
1343      m_buffsRequired=1;
1344  }  }
1345    
1346  }   // end namespace  }   // end namespace

Legend:
Removed from v.1898  
changed lines
  Added in v.2092

  ViewVC Help
Powered by ViewVC 1.1.26