/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escript/src/DataLazy.cpp revision 2086 by jfenwick, Mon Nov 24 02:38:50 2008 UTC
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    /*
32    How does DataLazy work?
33    ~~~~~~~~~~~~~~~~~~~~~~~
34    
35    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
36    denoted left and right.
37    
38    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
39    This means that all "internal" nodes in the structure are instances of DataLazy.
40    
41    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
42    Note that IDENITY is not considered a unary operation.
43    
44    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
45    It must however form a DAG (directed acyclic graph).
46    I will refer to individual DataLazy objects with the structure as nodes.
47    
48    Each node also stores:
49    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
50        evaluated.
51    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
52        evaluate the expression.
53    - m_samplesize ~ the number of doubles stored in a sample.
54    
55    When a new node is created, the above values are computed based on the values in the child nodes.
56    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
57    
58    The resolve method, which produces a DataReady from a DataLazy, does the following:
59    1) Create a DataReady to hold the new result.
60    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
61    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
62    
63    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
64    
65    resolveSample returns a Vector* and an offset within that vector where the result is stored.
66    Normally, this would be v, but for identity nodes their internal vector is returned instead.
67    
68    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
69    
70    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
71    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
72    
73    To add a new operator you need to do the following (plus anything I might have forgotten):
74    1) Add to the ES_optype.
75    2) determine what opgroup your operation belongs to (X)
76    3) add a string for the op to the end of ES_opstrings
77    4) increase ES_opcount
78    5) add an entry (X) to opgroups
79    6) add an entry to the switch in collapseToReady
80    7) add an entry to resolveX
81    */
82    
83    
84  using namespace std;  using namespace std;
85  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 87  using namespace boost;
87  namespace escript  namespace escript
88  {  {
89    
 const std::string&  
 opToString(ES_optype op);  
   
90  namespace  namespace
91  {  {
92    
   
   
93  enum ES_opgroup  enum ES_opgroup
94  {  {
95     G_UNKNOWN,     G_UNKNOWN,
96     G_IDENTITY,     G_IDENTITY,
97     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
98     G_UNARY     G_UNARY,     // pointwise operations with one argument
99       G_NP1OUT,        // non-pointwise op with one output
100       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
101       G_TENSORPROD     // general tensor product
102  };  };
103    
104    
105    
106    
107  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
108                "sin","cos","tan",
109              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
110              "asinh","acosh","atanh",              "asinh","acosh","atanh",
111              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
112              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0",
113  int ES_opcount=32;              "symmetric","nonsymmetric",
114  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
115              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose",
116              G_UNARY,G_UNARY,G_UNARY,                    // 19              "trace"};
117              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27  int ES_opcount=38;
118              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
119                G_UNARY,G_UNARY,G_UNARY, //10
120                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
121                G_UNARY,G_UNARY,G_UNARY,                    // 20
122                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
123                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33
124                G_NP1OUT,G_NP1OUT,
125                G_TENSORPROD,
126                G_NP1OUT_P, G_NP1OUT_P};
127  inline  inline
128  ES_opgroup  ES_opgroup
129  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 140  resultFS(DataAbstract_ptr left, DataAbst
140      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
141      // programming error exception.      // programming error exception.
142    
143      FunctionSpace l=left->getFunctionSpace();
144      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
145      {    if (l!=r)
146          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
147      }      if (r.probeInterpolation(l))
148      return left->getFunctionSpace();      {
149        return l;
150        }
151        if (l.probeInterpolation(r))
152        {
153        return r;
154        }
155        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
156      }
157      return l;
158  }  }
159    
160  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
161    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
162  DataTypes::ShapeType  DataTypes::ShapeType
163  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
164  {  {
165      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
166      {      {
167          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
168          {
169            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
170          }
171          if (left->getRank()==0)   // we need to allow scalar * anything
172          {
173            return right->getShape();
174          }
175          if (right->getRank()==0)
176          {
177            return left->getShape();
178          }
179          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
180      }      }
181      return left->getShape();      return left->getShape();
182  }  }
183    
184  size_t  // return the shape for "op left"
185  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
186    DataTypes::ShapeType
187    resultShape(DataAbstract_ptr left, ES_optype op)
188  {  {
189     switch (getOpgroup(op))      switch(op)
190     {      {
191     case G_BINARY: return left->getLength();          case TRANS:
192     case G_UNARY: return left->getLength();          return left->getShape();
193     default:      break;
194      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");      case TRACE:
195     }          return DataTypes::scalarShape;
196        break;
197            default:
198        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
199        }
200    }
201    
202    // determine the output shape for the general tensor product operation
203    // the additional parameters return information required later for the product
204    // the majority of this code is copy pasted from C_General_Tensor_Product
205    DataTypes::ShapeType
206    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
207    {
208        
209      // Get rank and shape of inputs
210      int rank0 = left->getRank();
211      int rank1 = right->getRank();
212      const DataTypes::ShapeType& shape0 = left->getShape();
213      const DataTypes::ShapeType& shape1 = right->getShape();
214    
215      // Prepare for the loops of the product and verify compatibility of shapes
216      int start0=0, start1=0;
217      if (transpose == 0)       {}
218      else if (transpose == 1)  { start0 = axis_offset; }
219      else if (transpose == 2)  { start1 = rank1-axis_offset; }
220      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
221    
222      if (rank0<axis_offset)
223      {
224        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
225      }
226    
227      // Adjust the shapes for transpose
228      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
229      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
230      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
231      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
232    
233      // Prepare for the loops of the product
234      SL=1, SM=1, SR=1;
235      for (int i=0; i<rank0-axis_offset; i++)   {
236        SL *= tmpShape0[i];
237      }
238      for (int i=rank0-axis_offset; i<rank0; i++)   {
239        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
240          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
241        }
242        SM *= tmpShape0[i];
243      }
244      for (int i=axis_offset; i<rank1; i++)     {
245        SR *= tmpShape1[i];
246      }
247    
248      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
249      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
250      {         // block to limit the scope of out_index
251         int out_index=0;
252         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
253         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
254      }
255    
256      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
257      {
258         ostringstream os;
259         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
260         throw DataException(os.str());
261      }
262    
263      return shape2;
264  }  }
265    
266    
267    // determine the number of points in the result of "left op right"
268    // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here
269    // size_t
270    // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
271    // {
272    //    switch (getOpgroup(op))
273    //    {
274    //    case G_BINARY: return left->getLength();
275    //    case G_UNARY: return left->getLength();
276    //    case G_NP1OUT: return left->getLength();
277    //    default:
278    //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");
279    //    }
280    // }
281    
282    // determine the number of samples requires to evaluate an expression combining left and right
283    // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
284    // The same goes for G_TENSORPROD
285  int  int
286  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
287  {  {
# Line 118  calcBuffs(const DataLazy_ptr& left, cons Line 290  calcBuffs(const DataLazy_ptr& left, cons
290     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
291     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);
292     case G_UNARY: return max(left->getBuffsRequired(),1);     case G_UNARY: return max(left->getBuffsRequired(),1);
293       case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
294       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
295       case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
296     default:     default:
297      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
298     }     }
299  }  }
300    
301    
   
302  }   // end anonymous namespace  }   // end anonymous namespace
303    
304    
305    
306    // Return a string representing the operation
307  const std::string&  const std::string&
308  opToString(ES_optype op)  opToString(ES_optype op)
309  {  {
# Line 141  opToString(ES_optype op) Line 317  opToString(ES_optype op)
317    
318  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
319      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape()),
320      m_op(IDENTITY)      m_op(IDENTITY),
321        m_axis_offset(0),
322        m_transpose(0),
323        m_SL(0), m_SM(0), m_SR(0)
324  {  {
325     if (p->isLazy())     if (p->isLazy())
326     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
327      // I don't want identity of Lazy.      // I don't want identity of Lazy.
328      // Question: Why would that be so bad?      // Question: Why would that be so bad?
329      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 154  DataLazy::DataLazy(DataAbstract_ptr p) Line 332  DataLazy::DataLazy(DataAbstract_ptr p)
332     else     else
333     {     {
334      m_id=dynamic_pointer_cast<DataReady>(p);      m_id=dynamic_pointer_cast<DataReady>(p);
335        if(p->isConstant()) {m_readytype='C';}
336        else if(p->isExpanded()) {m_readytype='E';}
337        else if (p->isTagged()) {m_readytype='T';}
338        else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}
339     }     }
    m_length=p->getLength();  
340     m_buffsRequired=1;     m_buffsRequired=1;
341     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
342       m_maxsamplesize=m_samplesize;
343  cout << "(1)Lazy created with " << m_samplesize << endl;  cout << "(1)Lazy created with " << m_samplesize << endl;
344  }  }
345    
346    
347    
348    
349  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
350      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),left->getShape()),
351      m_op(op)      m_op(op),
352        m_axis_offset(0),
353        m_transpose(0),
354        m_SL(0), m_SM(0), m_SR(0)
355  {  {
356     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
357     {     {
358      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
359     }     }
360    
361     DataLazy_ptr lleft;     DataLazy_ptr lleft;
362     if (!left->isLazy())     if (!left->isLazy())
363     {     {
# Line 178  DataLazy::DataLazy(DataAbstract_ptr left Line 367  DataLazy::DataLazy(DataAbstract_ptr left
367     {     {
368      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
369     }     }
370     m_length=left->getLength();     m_readytype=lleft->m_readytype;
371     m_left=lleft;     m_left=lleft;
372     m_buffsRequired=1;     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
373     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
374       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
375  }  }
376    
377    
378  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
379    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
380      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
381      m_left(left),      m_op(op),
382      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
383  {  {
384     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_BINARY))
385     {     {
386      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
387     }     }
388     m_length=resultLength(m_left,m_right,m_op);  
389       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
390       {
391        FunctionSpace fs=getFunctionSpace();
392        Data ltemp(left);
393        Data tmp(ltemp,fs);
394        left=tmp.borrowDataPtr();
395       }
396       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
397       {
398        Data tmp(Data(right),getFunctionSpace());
399        right=tmp.borrowDataPtr();
400       }
401       left->operandCheck(*right);
402    
403       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
404       {
405        m_left=dynamic_pointer_cast<DataLazy>(left);
406       }
407       else
408       {
409        m_left=DataLazy_ptr(new DataLazy(left));
410       }
411       if (right->isLazy())
412       {
413        m_right=dynamic_pointer_cast<DataLazy>(right);
414       }
415       else
416       {
417        m_right=DataLazy_ptr(new DataLazy(right));
418       }
419       char lt=m_left->m_readytype;
420       char rt=m_right->m_readytype;
421       if (lt=='E' || rt=='E')
422       {
423        m_readytype='E';
424       }
425       else if (lt=='T' || rt=='T')
426       {
427        m_readytype='T';
428       }
429       else
430       {
431        m_readytype='C';
432       }
433     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
434     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
435  cout << "(2)Lazy created with " << m_samplesize << endl;     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
436    cout << "(3)Lazy created with " << m_samplesize << endl;
437  }  }
438    
439  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
440      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
441      m_op(op)      m_op(op),
442        m_axis_offset(axis_offset),
443        m_transpose(transpose)
444  {  {
445     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
446       {
447        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
448       }
449       if ((transpose>2) || (transpose<0))
450       {
451        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
452       }
453       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
454     {     {
455      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");      FunctionSpace fs=getFunctionSpace();
456        Data ltemp(left);
457        Data tmp(ltemp,fs);
458        left=tmp.borrowDataPtr();
459     }     }
460     if (left->isLazy())     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
461       {
462        Data tmp(Data(right),getFunctionSpace());
463        right=tmp.borrowDataPtr();
464       }
465       left->operandCheck(*right);
466    
467       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
468     {     {
469      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
470     }     }
# Line 225  DataLazy::DataLazy(DataAbstract_ptr left Line 480  DataLazy::DataLazy(DataAbstract_ptr left
480     {     {
481      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
482     }     }
483       char lt=m_left->m_readytype;
484     m_length=resultLength(m_left,m_right,m_op);     char rt=m_right->m_readytype;
485       if (lt=='E' || rt=='E')
486       {
487        m_readytype='E';
488       }
489       else if (lt=='T' || rt=='T')
490       {
491        m_readytype='T';
492       }
493       else
494       {
495        m_readytype='C';
496       }
497     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
498       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
499     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
500  cout << "(3)Lazy created with " << m_samplesize << endl;  cout << "(4)Lazy created with " << m_samplesize << endl;
501    }
502    
503    
504    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
505        : parent(left->getFunctionSpace(), resultShape(left,op)),
506        m_op(op),
507        m_axis_offset(axis_offset),
508        m_transpose(0)
509    {
510       if ((getOpgroup(op)!=G_NP1OUT_P))
511       {
512        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
513       }
514       DataLazy_ptr lleft;
515       if (!left->isLazy())
516       {
517        lleft=DataLazy_ptr(new DataLazy(left));
518       }
519       else
520       {
521        lleft=dynamic_pointer_cast<DataLazy>(left);
522       }
523       m_readytype=lleft->m_readytype;
524       m_left=lleft;
525       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
526       m_samplesize=getNumDPPSample()*getNoValues();
527       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
528    cout << "(5)Lazy created with " << m_samplesize << endl;
529  }  }
530    
531    
# Line 245  DataLazy::getBuffsRequired() const Line 541  DataLazy::getBuffsRequired() const
541  }  }
542    
543    
544  // the vector and the offset are a place where the method could write its data if it wishes  size_t
545  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::getMaxSampleSize() const
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
 const double*  
 DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  
546  {  {
547    if (m_op==IDENTITY)        return m_maxsamplesize;
548    }
549    
550    /*
551      \brief Evaluates the expression using methods on Data.
552      This does the work for the collapse method.
553      For reasons of efficiency do not call this method on DataExpanded nodes.
554    */
555    DataReady_ptr
556    DataLazy::collapseToReady()
557    {
558      if (m_readytype=='E')
559      { // this is more an efficiency concern than anything else
560        throw DataException("Programmer Error - do not use collapse on Expanded data.");
561      }
562      if (m_op==IDENTITY)
563    {    {
564      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
565    }    }
566    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
567    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
568    const double* right=0;    Data right;
569    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
570    {    {
571      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
572    }    }
573    double* result=&(v[offset]);    Data result;
574      switch(m_op)
575    {    {
576      switch(m_op)      case ADD:
577      {      result=left+right;
     case ADD:       // since these are pointwise ops, pretend each sample is one point  
     tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  
578      break;      break;
579      case SUB:            case SUB:      
580      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      result=left-right;
581      break;      break;
582      case MUL:            case MUL:      
583      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      result=left*right;
584      break;      break;
585      case DIV:            case DIV:      
586      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      result=left/right;
587      break;      break;
 // unary ops  
588      case SIN:      case SIN:
589      tensor_unary_operation(m_samplesize, left, result, ::sin);      result=left.sin();  
590      break;      break;
591      case COS:      case COS:
592      tensor_unary_operation(m_samplesize, left, result, ::cos);      result=left.cos();
593      break;      break;
594      case TAN:      case TAN:
595      tensor_unary_operation(m_samplesize, left, result, ::tan);      result=left.tan();
596      break;      break;
597      case ASIN:      case ASIN:
598      tensor_unary_operation(m_samplesize, left, result, ::asin);      result=left.asin();
599      break;      break;
600      case ACOS:      case ACOS:
601      tensor_unary_operation(m_samplesize, left, result, ::acos);      result=left.acos();
602      break;      break;
603      case ATAN:      case ATAN:
604      tensor_unary_operation(m_samplesize, left, result, ::atan);      result=left.atan();
605      break;      break;
606      case SINH:      case SINH:
607      tensor_unary_operation(m_samplesize, left, result, ::sinh);      result=left.sinh();
608      break;      break;
609      case COSH:      case COSH:
610      tensor_unary_operation(m_samplesize, left, result, ::cosh);      result=left.cosh();
611      break;      break;
612      case TANH:      case TANH:
613      tensor_unary_operation(m_samplesize, left, result, ::tanh);      result=left.tanh();
614      break;      break;
615      case ERF:      case ERF:
616  #ifdef _WIN32      result=left.erf();
617        break;
618       case ASINH:
619        result=left.asinh();
620        break;
621       case ACOSH:
622        result=left.acosh();
623        break;
624       case ATANH:
625        result=left.atanh();
626        break;
627        case LOG10:
628        result=left.log10();
629        break;
630        case LOG:
631        result=left.log();
632        break;
633        case SIGN:
634        result=left.sign();
635        break;
636        case ABS:
637        result=left.abs();
638        break;
639        case NEG:
640        result=left.neg();
641        break;
642        case POS:
643        // it doesn't mean anything for delayed.
644        // it will just trigger a deep copy of the lazy object
645        throw DataException("Programmer error - POS not supported for lazy data.");
646        break;
647        case EXP:
648        result=left.exp();
649        break;
650        case SQRT:
651        result=left.sqrt();
652        break;
653        case RECIP:
654        result=left.oneOver();
655        break;
656        case GZ:
657        result=left.wherePositive();
658        break;
659        case LZ:
660        result=left.whereNegative();
661        break;
662        case GEZ:
663        result=left.whereNonNegative();
664        break;
665        case LEZ:
666        result=left.whereNonPositive();
667        break;
668        case SYM:
669        result=left.symmetric();
670        break;
671        case NSYM:
672        result=left.nonsymmetric();
673        break;
674        case PROD:
675        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
676        break;
677        case TRANS:
678        result=left.transpose(m_axis_offset);
679        break;
680        case TRACE:
681        result=left.trace(m_axis_offset);
682        break;
683        default:
684        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
685      }
686      return result.borrowReadyPtr();
687    }
688    
689    /*
690       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
691       This method uses the original methods on the Data class to evaluate the expressions.
692       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
693       the purpose of using DataLazy in the first place).
694    */
695    void
696    DataLazy::collapse()
697    {
698      if (m_op==IDENTITY)
699      {
700        return;
701      }
702      if (m_readytype=='E')
703      { // this is more an efficiency concern than anything else
704        throw DataException("Programmer Error - do not use collapse on Expanded data.");
705      }
706      m_id=collapseToReady();
707      m_op=IDENTITY;
708    }
709    
710    /*
711      \brief Compute the value of the expression (unary operation) for the given sample.
712      \return Vector which stores the value of the subexpression for the given sample.
713      \param v A vector to store intermediate results.
714      \param offset Index in v to begin storing results.
715      \param sampleNo Sample number to evaluate.
716      \param roffset (output parameter) the offset in the return vector where the result begins.
717    
718      The return value will be an existing vector so do not deallocate it.
719      If the result is stored in v it should be stored at the offset given.
720      Everything from offset to the end of v should be considered available for this method to use.
721    */
722    DataTypes::ValueType*
723    DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
724    {
725        // we assume that any collapsing has been done before we get here
726        // since we only have one argument we don't need to think about only
727        // processing single points.
728      if (m_readytype!='E')
729      {
730        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
731      }
732      const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
733      const double* left=&((*vleft)[roffset]);
734      double* result=&(v[offset]);
735      roffset=offset;
736      switch (m_op)
737      {
738        case SIN:  
739        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
740        break;
741        case COS:
742        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
743        break;
744        case TAN:
745        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
746        break;
747        case ASIN:
748        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
749        break;
750        case ACOS:
751        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
752        break;
753        case ATAN:
754        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
755        break;
756        case SINH:
757        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
758        break;
759        case COSH:
760        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
761        break;
762        case TANH:
763        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
764        break;
765        case ERF:
766    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
767      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
768  #else  #else
769      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
770      break;      break;
771  #endif  #endif
772     case ASINH:     case ASINH:
773  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
774      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
775  #else  #else
776      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
777  #endif    #endif  
778      break;      break;
779     case ACOSH:     case ACOSH:
780  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
781      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
782  #else  #else
783      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
784  #endif    #endif  
785      break;      break;
786     case ATANH:     case ATANH:
787  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
788      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
789  #else  #else
790      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
791  #endif    #endif  
792      break;      break;
793      case LOG10:      case LOG10:
794      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
795      break;      break;
796      case LOG:      case LOG:
797      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
798      break;      break;
799      case SIGN:      case SIGN:
800      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
801      break;      break;
802      case ABS:      case ABS:
803      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
804      break;      break;
805      case NEG:      case NEG:
806      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 357  DataLazy::resolveSample(ValueType& v,int Line 811  DataLazy::resolveSample(ValueType& v,int
811      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
812      break;      break;
813      case EXP:      case EXP:
814      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
815      break;      break;
816      case SQRT:      case SQRT:
817      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
818      break;      break;
819      case RECIP:      case RECIP:
820      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 379  DataLazy::resolveSample(ValueType& v,int Line 833  DataLazy::resolveSample(ValueType& v,int
833      break;      break;
834    
835      default:      default:
836      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
837      }
838      return &v;
839    }
840    
841    
842    /*
843      \brief Compute the value of the expression (unary operation) for the given sample.
844      \return Vector which stores the value of the subexpression for the given sample.
845      \param v A vector to store intermediate results.
846      \param offset Index in v to begin storing results.
847      \param sampleNo Sample number to evaluate.
848      \param roffset (output parameter) the offset in the return vector where the result begins.
849    
850      The return value will be an existing vector so do not deallocate it.
851      If the result is stored in v it should be stored at the offset given.
852      Everything from offset to the end of v should be considered available for this method to use.
853    */
854    DataTypes::ValueType*
855    DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
856    {
857        // we assume that any collapsing has been done before we get here
858        // since we only have one argument we don't need to think about only
859        // processing single points.
860      if (m_readytype!='E')
861      {
862        throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
863      }
864        // since we can't write the result over the input, we need a result offset further along
865      size_t subroffset=roffset+m_samplesize;
866      const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);
867      roffset=offset;
868      switch (m_op)
869      {
870        case SYM:
871        DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
872        break;
873        case NSYM:
874        DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
875        break;
876        default:
877        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
878      }
879      return &v;
880    }
881    
882    /*
883      \brief Compute the value of the expression (unary operation) for the given sample.
884      \return Vector which stores the value of the subexpression for the given sample.
885      \param v A vector to store intermediate results.
886      \param offset Index in v to begin storing results.
887      \param sampleNo Sample number to evaluate.
888      \param roffset (output parameter) the offset in the return vector where the result begins.
889    
890      The return value will be an existing vector so do not deallocate it.
891      If the result is stored in v it should be stored at the offset given.
892      Everything from offset to the end of v should be considered available for this method to use.
893    */
894    DataTypes::ValueType*
895    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
896    {
897        // we assume that any collapsing has been done before we get here
898        // since we only have one argument we don't need to think about only
899        // processing single points.
900      if (m_readytype!='E')
901      {
902        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
903      }
904        // since we can't write the result over the input, we need a result offset further along
905      size_t subroffset=roffset+m_samplesize;
906      const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);
907      roffset=offset;
908      switch (m_op)
909      {
910        case TRACE:
911             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
912        break;
913        case TRANS:
914             DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
915        break;
916        default:
917        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
918      }
919      return &v;
920    }
921    
922    
923    #define PROC_OP(TYPE,X)                               \
924        for (int i=0;i<steps;++i,resultp+=resultStep) \
925        { \
926           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
927           lroffset+=leftStep; \
928           rroffset+=rightStep; \
929        }
930    
931    /*
932      \brief Compute the value of the expression (binary operation) for the given sample.
933      \return Vector which stores the value of the subexpression for the given sample.
934      \param v A vector to store intermediate results.
935      \param offset Index in v to begin storing results.
936      \param sampleNo Sample number to evaluate.
937      \param roffset (output parameter) the offset in the return vector where the result begins.
938    
939      The return value will be an existing vector so do not deallocate it.
940      If the result is stored in v it should be stored at the offset given.
941      Everything from offset to the end of v should be considered available for this method to use.
942    */
943    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
944    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
945    // If both children are expanded, then we can process them in a single operation (we treat
946    // the whole sample as one big datapoint.
947    // If one of the children is not expanded, then we need to treat each point in the sample
948    // individually.
949    // There is an additional complication when scalar operations are considered.
950    // For example, 2+Vector.
951    // In this case each double within the point is treated individually
952    DataTypes::ValueType*
953    DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
954    {
955    cout << "Resolve binary: " << toString() << endl;
956    
957      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
958        // first work out which of the children are expanded
959      bool leftExp=(m_left->m_readytype=='E');
960      bool rightExp=(m_right->m_readytype=='E');
961      if (!leftExp && !rightExp)
962      {
963        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
964      }
965      bool leftScalar=(m_left->getRank()==0);
966      bool rightScalar=(m_right->getRank()==0);
967      bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?
968      int steps=(bigloops?1:getNumDPPSample());
969      size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint
970      if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops
971      {
972        if (!leftScalar && !rightScalar)
973        {
974           throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
975        }
976        steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());
977        chunksize=1;    // for scalar
978      }    
979      int leftStep=((leftExp && (!rightExp || rightScalar))? m_right->getNoValues() : 0);
980      int rightStep=((rightExp && (!leftExp || leftScalar))? m_left->getNoValues() : 0);
981      int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0
982        // Get the values of sub-expressions
983      const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);
984      const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note
985        // the right child starts further along.
986      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
987      switch(m_op)
988      {
989        case ADD:
990            PROC_OP(NO_ARG,plus<double>());
991        break;
992        case SUB:
993        PROC_OP(NO_ARG,minus<double>());
994        break;
995        case MUL:
996        PROC_OP(NO_ARG,multiplies<double>());
997        break;
998        case DIV:
999        PROC_OP(NO_ARG,divides<double>());
1000        break;
1001        case POW:
1002           PROC_OP(double (double,double),::pow);
1003        break;
1004        default:
1005        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1006      }
1007      roffset=offset;  
1008      return &v;
1009    }
1010    
1011    
1012    /*
1013      \brief Compute the value of the expression (tensor product) for the given sample.
1014      \return Vector which stores the value of the subexpression for the given sample.
1015      \param v A vector to store intermediate results.
1016      \param offset Index in v to begin storing results.
1017      \param sampleNo Sample number to evaluate.
1018      \param roffset (output parameter) the offset in the return vector where the result begins.
1019    
1020      The return value will be an existing vector so do not deallocate it.
1021      If the result is stored in v it should be stored at the offset given.
1022      Everything from offset to the end of v should be considered available for this method to use.
1023    */
1024    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1025    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1026    // unlike the other resolve helpers, we must treat these datapoints separately.
1027    DataTypes::ValueType*
1028    DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1029    {
1030    cout << "Resolve TensorProduct: " << toString() << endl;
1031    
1032      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1033        // first work out which of the children are expanded
1034      bool leftExp=(m_left->m_readytype=='E');
1035      bool rightExp=(m_right->m_readytype=='E');
1036      int steps=getNumDPPSample();
1037      int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1038      int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1039      int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0
1040        // Get the values of sub-expressions (leave a gap of one sample for the result).
1041      const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);
1042      const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);
1043      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1044      switch(m_op)
1045      {
1046        case PROD:
1047        for (int i=0;i<steps;++i,resultp+=resultStep)
1048        {
1049              const double *ptr_0 = &((*left)[lroffset]);
1050              const double *ptr_1 = &((*right)[rroffset]);
1051              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1052          lroffset+=leftStep;
1053          rroffset+=rightStep;
1054        }
1055        break;
1056        default:
1057        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1058      }
1059      roffset=offset;
1060      return &v;
1061    }
1062    
1063    
1064    
1065    /*
1066      \brief Compute the value of the expression for the given sample.
1067      \return Vector which stores the value of the subexpression for the given sample.
1068      \param v A vector to store intermediate results.
1069      \param offset Index in v to begin storing results.
1070      \param sampleNo Sample number to evaluate.
1071      \param roffset (output parameter) the offset in the return vector where the result begins.
1072    
1073      The return value will be an existing vector so do not deallocate it.
1074    */
1075    // the vector and the offset are a place where the method could write its data if it wishes
1076    // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1077    // Hence the return value to indicate where the data is actually stored.
1078    // Regardless, the storage should be assumed to be used, even if it isn't.
1079    
1080    // the roffset is the offset within the returned vector where the data begins
1081    const DataTypes::ValueType*
1082    DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1083    {
1084    cout << "Resolve sample " << toString() << endl;
1085        // collapse so we have a 'E' node or an IDENTITY for some other type
1086      if (m_readytype!='E' && m_op!=IDENTITY)
1087      {
1088        collapse();
1089      }
1090      if (m_op==IDENTITY)  
1091      {
1092        const ValueType& vec=m_id->getVector();
1093        if (m_readytype=='C')
1094        {
1095        roffset=0;
1096        return &(vec);
1097      }      }
1098        roffset=m_id->getPointOffset(sampleNo, 0);
1099        return &(vec);
1100      }
1101      if (m_readytype!='E')
1102      {
1103        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1104      }
1105      switch (getOpgroup(m_op))
1106      {
1107      case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);
1108      case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1109      case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1110      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1111      case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1112      default:
1113        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1114    }    }
   return result;  
1115  }  }
1116    
1117    
1118    // To simplify the memory management, all threads operate on one large vector, rather than one each.
1119    // Each sample is evaluated independently and copied into the result DataExpanded.
1120  DataReady_ptr  DataReady_ptr
1121  DataLazy::resolve()  DataLazy::resolve()
1122  {  {
   // This is broken!     We need to have a buffer per thread!  
   // so the allocation of v needs to move inside the loop somehow  
1123    
1124  cout << "Sample size=" << m_samplesize << endl;  cout << "Sample size=" << m_samplesize << endl;
1125  cout << "Buffers=" << m_buffsRequired << endl;  cout << "Buffers=" << m_buffsRequired << endl;
1126    
1127    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1128      {
1129        collapse();
1130      }
1131      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1132      {
1133        return m_id;
1134      }
1135        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1136      size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1137        // storage to evaluate its expression
1138    int numthreads=1;    int numthreads=1;
1139  #ifdef _OPENMP  #ifdef _OPENMP
1140    numthreads=omp_get_max_threads();    numthreads=getNumberOfThreads();
   int threadnum=0;  
1141  #endif  #endif
1142    ValueType v(numthreads*threadbuffersize);    ValueType v(numthreads*threadbuffersize);
1143  cout << "Buffer created with size=" << v.size() << endl;  cout << "Buffer created with size=" << v.size() << endl;
1144    ValueType dummy(getNoValues());    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);  
1145    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVector();
1146    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1147    int sample;    int sample;
1148    int resoffset;    size_t outoffset;     // offset in the output data
1149    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1150    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;   // Vector storing the answer
1151      size_t resoffset=0;       // where in the vector to find the answer
1152      #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1153    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
1154    {    {
1155    cout << "################################# " << sample << endl;
1156  #ifdef _OPENMP  #ifdef _OPENMP
1157      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1158  #else  #else
1159      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
1160  #endif  #endif
1161      resoffset=result->getPointOffset(sample,0);  cerr << "-------------------------------- " << endl;
1162      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector      outoffset=result->getPointOffset(sample,0);
1163    cerr << "offset=" << outoffset << endl;
1164        for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
1165      {      {
1166      resvec[resoffset]=res[i];      resvec[outoffset]=(*res)[resoffset];
1167      }      }
1168    cerr << "*********************************" << endl;
1169    }    }
1170    return resptr;    return resptr;
1171  }  }
# Line 435  DataLazy::toString() const Line 1179  DataLazy::toString() const
1179    return oss.str();    return oss.str();
1180  }  }
1181    
1182    
1183  void  void
1184  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1185  {  {
1186    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1187    {    {
1188    case G_IDENTITY:    case G_IDENTITY:
1189        if (m_id->isExpanded())
1190        {
1191           oss << "E";
1192        }
1193        else if (m_id->isTagged())
1194        {
1195          oss << "T";
1196        }
1197        else if (m_id->isConstant())
1198        {
1199          oss << "C";
1200        }
1201        else
1202        {
1203          oss << "?";
1204        }
1205      oss << '@' << m_id.get();      oss << '@' << m_id.get();
1206      break;      break;
1207    case G_BINARY:    case G_BINARY:
# Line 451  DataLazy::intoString(ostringstream& oss) Line 1212  DataLazy::intoString(ostringstream& oss)
1212      oss << ')';      oss << ')';
1213      break;      break;
1214    case G_UNARY:    case G_UNARY:
1215      case G_NP1OUT:
1216      case G_NP1OUT_P:
1217      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1218      m_left->intoString(oss);      m_left->intoString(oss);
1219      oss << ')';      oss << ')';
1220      break;      break;
1221      case G_TENSORPROD:
1222        oss << opToString(m_op) << '(';
1223        m_left->intoString(oss);
1224        oss << ", ";
1225        m_right->intoString(oss);
1226        oss << ')';
1227        break;
1228    default:    default:
1229      oss << "UNKNOWN";      oss << "UNKNOWN";
1230    }    }
1231  }  }
1232    
 // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
1233  DataAbstract*  DataAbstract*
1234  DataLazy::deepCopy()  DataLazy::deepCopy()
1235  {  {
1236    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1237    {    {
1238      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1239      case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1240      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1241      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1242      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1243      default:
1244        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1245    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1246  }  }
1247    
1248    
1249    // There is no single, natural interpretation of getLength on DataLazy.
1250    // Instances of DataReady can look at the size of their vectors.
1251    // For lazy though, it could be the size the data would be if it were resolved;
1252    // or it could be some function of the lengths of the DataReady instances which
1253    // form part of the expression.
1254    // Rather than have people making assumptions, I have disabled the method.
1255  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1256  DataLazy::getLength() const  DataLazy::getLength() const
1257  {  {
1258    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1259  }  }
1260    
1261    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 1265  DataLazy::getSlice(const DataTypes::Regi
1265    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1266  }  }
1267    
1268    
1269    // To do this we need to rely on our child nodes
1270    DataTypes::ValueType::size_type
1271    DataLazy::getPointOffset(int sampleNo,
1272                     int dataPointNo)
1273    {
1274      if (m_op==IDENTITY)
1275      {
1276        return m_id->getPointOffset(sampleNo,dataPointNo);
1277      }
1278      if (m_readytype!='E')
1279      {
1280        collapse();
1281        return m_id->getPointOffset(sampleNo,dataPointNo);
1282      }
1283      // at this point we do not have an identity node and the expression will be Expanded
1284      // so we only need to know which child to ask
1285      if (m_left->m_readytype=='E')
1286      {
1287        return m_left->getPointOffset(sampleNo,dataPointNo);
1288      }
1289      else
1290      {
1291        return m_right->getPointOffset(sampleNo,dataPointNo);
1292      }
1293    }
1294    
1295    // To do this we need to rely on our child nodes
1296  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1297  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1298                   int dataPointNo) const                   int dataPointNo) const
1299  {  {
1300    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1301      {
1302        return m_id->getPointOffset(sampleNo,dataPointNo);
1303      }
1304      if (m_readytype=='E')
1305      {
1306        // at this point we do not have an identity node and the expression will be Expanded
1307        // so we only need to know which child to ask
1308        if (m_left->m_readytype=='E')
1309        {
1310        return m_left->getPointOffset(sampleNo,dataPointNo);
1311        }
1312        else
1313        {
1314        return m_right->getPointOffset(sampleNo,dataPointNo);
1315        }
1316      }
1317      if (m_readytype=='C')
1318      {
1319        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1320      }
1321      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1322    }
1323    
1324    // It would seem that DataTagged will need to be treated differently since even after setting all tags
1325    // to zero, all the tags from all the DataTags would be in the result.
1326    // However since they all have the same value (0) whether they are there or not should not matter.
1327    // So I have decided that for all types this method will create a constant 0.
1328    // It can be promoted up as required.
1329    // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1330    // but we can deal with that if it arrises.
1331    void
1332    DataLazy::setToZero()
1333    {
1334      DataTypes::ValueType v(getNoValues(),0);
1335      m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1336      m_op=IDENTITY;
1337      m_right.reset();  
1338      m_left.reset();
1339      m_readytype='C';
1340      m_buffsRequired=1;
1341  }  }
1342    
1343  }   // end namespace  }   // end namespace

Legend:
Removed from v.1888  
changed lines
  Added in v.2086

  ViewVC Help
Powered by ViewVC 1.1.26