/[escript]/trunk/escript/src/DataLazy.cpp
ViewVC logotype

Diff of /trunk/escript/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escript/src/DataLazy.cpp revision 2157 by jfenwick, Mon Dec 15 06:05:58 2008 UTC
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    // #define LAZYDEBUG(X) if (privdebug){X;}
32    #define LAZYDEBUG(X)
33    namespace
34    {
35    bool privdebug=false;
36    
37    #define ENABLEDEBUG privdebug=true;
38    #define DISABLEDEBUG privdebug=false;
39    }
40    
41    /*
42    How does DataLazy work?
43    ~~~~~~~~~~~~~~~~~~~~~~~
44    
45    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
46    denoted left and right.
47    
48    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
49    This means that all "internal" nodes in the structure are instances of DataLazy.
50    
51    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
52    Note that IDENITY is not considered a unary operation.
53    
54    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
55    It must however form a DAG (directed acyclic graph).
56    I will refer to individual DataLazy objects with the structure as nodes.
57    
58    Each node also stores:
59    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
60        evaluated.
61    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
62        evaluate the expression.
63    - m_samplesize ~ the number of doubles stored in a sample.
64    
65    When a new node is created, the above values are computed based on the values in the child nodes.
66    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
67    
68    The resolve method, which produces a DataReady from a DataLazy, does the following:
69    1) Create a DataReady to hold the new result.
70    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
71    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
72    
73    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
74    
75    resolveSample returns a Vector* and an offset within that vector where the result is stored.
76    Normally, this would be v, but for identity nodes their internal vector is returned instead.
77    
78    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
79    
80    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
81    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
82    
83    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
84    1) Add to the ES_optype.
85    2) determine what opgroup your operation belongs to (X)
86    3) add a string for the op to the end of ES_opstrings
87    4) increase ES_opcount
88    5) add an entry (X) to opgroups
89    6) add an entry to the switch in collapseToReady
90    7) add an entry to resolveX
91    */
92    
93    
94  using namespace std;  using namespace std;
95  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 97  using namespace boost;
97  namespace escript  namespace escript
98  {  {
99    
 const std::string&  
 opToString(ES_optype op);  
   
100  namespace  namespace
101  {  {
102    
   
   
103  enum ES_opgroup  enum ES_opgroup
104  {  {
105     G_UNKNOWN,     G_UNKNOWN,
106     G_IDENTITY,     G_IDENTITY,
107     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
108     G_UNARY     G_UNARY,     // pointwise operations with one argument
109       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
110       G_NP1OUT,        // non-pointwise op with one output
111       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
112       G_TENSORPROD     // general tensor product
113  };  };
114    
115    
116    
117    
118  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
119                "sin","cos","tan",
120              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
121              "asinh","acosh","atanh",              "asinh","acosh","atanh",
122              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
123              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
124  int ES_opcount=32;              "symmetric","nonsymmetric",
125  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
126              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose", "trace"};
127              G_UNARY,G_UNARY,G_UNARY,                    // 19  int ES_opcount=40;
128              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
129              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              G_UNARY,G_UNARY,G_UNARY, //10
130                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
131                G_UNARY,G_UNARY,G_UNARY,                    // 20
132                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
133                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
134                G_NP1OUT,G_NP1OUT,
135                G_TENSORPROD,
136                G_NP1OUT_P, G_NP1OUT_P};
137  inline  inline
138  ES_opgroup  ES_opgroup
139  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 150  resultFS(DataAbstract_ptr left, DataAbst
150      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
151      // programming error exception.      // programming error exception.
152    
153      FunctionSpace l=left->getFunctionSpace();
154      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
155      {    if (l!=r)
156          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
157      }      if (r.probeInterpolation(l))
158      return left->getFunctionSpace();      {
159        return l;
160        }
161        if (l.probeInterpolation(r))
162        {
163        return r;
164        }
165        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
166      }
167      return l;
168  }  }
169    
170  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
171    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
172  DataTypes::ShapeType  DataTypes::ShapeType
173  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
174  {  {
175      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
176      {      {
177          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
178          {
179            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
180          }
181          if (left->getRank()==0)   // we need to allow scalar * anything
182          {
183            return right->getShape();
184          }
185          if (right->getRank()==0)
186          {
187            return left->getShape();
188          }
189          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
190      }      }
191      return left->getShape();      return left->getShape();
192  }  }
193    
194  size_t  // return the shape for "op left"
195  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
196    DataTypes::ShapeType
197    resultShape(DataAbstract_ptr left, ES_optype op)
198  {  {
199     switch (getOpgroup(op))      switch(op)
200     {      {
201     case G_BINARY: return left->getLength();          case TRANS:
202     case G_UNARY: return left->getLength();          return left->getShape();
203     default:      break;
204      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");      case TRACE:
205     }          return DataTypes::scalarShape;
206        break;
207            default:
208        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
209        }
210    }
211    
212    // determine the output shape for the general tensor product operation
213    // the additional parameters return information required later for the product
214    // the majority of this code is copy pasted from C_General_Tensor_Product
215    DataTypes::ShapeType
216    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
217    {
218        
219      // Get rank and shape of inputs
220      int rank0 = left->getRank();
221      int rank1 = right->getRank();
222      const DataTypes::ShapeType& shape0 = left->getShape();
223      const DataTypes::ShapeType& shape1 = right->getShape();
224    
225      // Prepare for the loops of the product and verify compatibility of shapes
226      int start0=0, start1=0;
227      if (transpose == 0)       {}
228      else if (transpose == 1)  { start0 = axis_offset; }
229      else if (transpose == 2)  { start1 = rank1-axis_offset; }
230      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
231    
232      if (rank0<axis_offset)
233      {
234        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
235      }
236    
237      // Adjust the shapes for transpose
238      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
239      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
240      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
241      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
242    
243      // Prepare for the loops of the product
244      SL=1, SM=1, SR=1;
245      for (int i=0; i<rank0-axis_offset; i++)   {
246        SL *= tmpShape0[i];
247      }
248      for (int i=rank0-axis_offset; i<rank0; i++)   {
249        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
250          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
251        }
252        SM *= tmpShape0[i];
253      }
254      for (int i=axis_offset; i<rank1; i++)     {
255        SR *= tmpShape1[i];
256      }
257    
258      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
259      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
260      {         // block to limit the scope of out_index
261         int out_index=0;
262         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
263         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
264      }
265    
266      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
267      {
268         ostringstream os;
269         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
270         throw DataException(os.str());
271      }
272    
273      return shape2;
274  }  }
275    
276    
277    // determine the number of points in the result of "left op right"
278    // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here
279    // size_t
280    // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
281    // {
282    //    switch (getOpgroup(op))
283    //    {
284    //    case G_BINARY: return left->getLength();
285    //    case G_UNARY: return left->getLength();
286    //    case G_NP1OUT: return left->getLength();
287    //    default:
288    //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");
289    //    }
290    // }
291    
292    // determine the number of samples requires to evaluate an expression combining left and right
293    // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
294    // The same goes for G_TENSORPROD
295    // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefths.
296    // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
297    // multiple times
298  int  int
299  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
300  {  {
301     switch(getOpgroup(op))     switch(getOpgroup(op))
302     {     {
303     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
304     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
305     case G_UNARY: return max(left->getBuffsRequired(),1);     case G_UNARY:
306       case G_UNARY_P: return max(left->getBuffsRequired(),1);
307       case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
308       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
309       case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
310     default:     default:
311      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
312     }     }
313  }  }
314    
315    
   
316  }   // end anonymous namespace  }   // end anonymous namespace
317    
318    
319    
320    // Return a string representing the operation
321  const std::string&  const std::string&
322  opToString(ES_optype op)  opToString(ES_optype op)
323  {  {
# Line 141  opToString(ES_optype op) Line 331  opToString(ES_optype op)
331    
332  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
333      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape()),
334      m_op(IDENTITY)      m_op(IDENTITY),
335        m_axis_offset(0),
336        m_transpose(0),
337        m_SL(0), m_SM(0), m_SR(0)
338  {  {
339     if (p->isLazy())     if (p->isLazy())
340     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
341      // I don't want identity of Lazy.      // I don't want identity of Lazy.
342      // Question: Why would that be so bad?      // Question: Why would that be so bad?
343      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 154  DataLazy::DataLazy(DataAbstract_ptr p) Line 346  DataLazy::DataLazy(DataAbstract_ptr p)
346     else     else
347     {     {
348      m_id=dynamic_pointer_cast<DataReady>(p);      m_id=dynamic_pointer_cast<DataReady>(p);
349        if(p->isConstant()) {m_readytype='C';}
350        else if(p->isExpanded()) {m_readytype='E';}
351        else if (p->isTagged()) {m_readytype='T';}
352        else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}
353     }     }
    m_length=p->getLength();  
354     m_buffsRequired=1;     m_buffsRequired=1;
355     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
356  cout << "(1)Lazy created with " << m_samplesize << endl;     m_maxsamplesize=m_samplesize;
357    LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
358  }  }
359    
360    
361    
362    
363  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
364      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),left->getShape()),
365      m_op(op)      m_op(op),
366        m_axis_offset(0),
367        m_transpose(0),
368        m_SL(0), m_SM(0), m_SR(0)
369  {  {
370     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
371     {     {
372      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
373     }     }
374    
375     DataLazy_ptr lleft;     DataLazy_ptr lleft;
376     if (!left->isLazy())     if (!left->isLazy())
377     {     {
# Line 178  DataLazy::DataLazy(DataAbstract_ptr left Line 381  DataLazy::DataLazy(DataAbstract_ptr left
381     {     {
382      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
383     }     }
384     m_length=left->getLength();     m_readytype=lleft->m_readytype;
385     m_left=lleft;     m_left=lleft;
386     m_buffsRequired=1;     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
387     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
388       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
389  }  }
390    
391    
392  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
393    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
394      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
395      m_left(left),      m_op(op),
396      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
397  {  {
398     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_BINARY))
399     {     {
400      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
401     }     }
402     m_length=resultLength(m_left,m_right,m_op);  
403       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
404       {
405        FunctionSpace fs=getFunctionSpace();
406        Data ltemp(left);
407        Data tmp(ltemp,fs);
408        left=tmp.borrowDataPtr();
409       }
410       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
411       {
412        Data tmp(Data(right),getFunctionSpace());
413        right=tmp.borrowDataPtr();
414       }
415       left->operandCheck(*right);
416    
417       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
418       {
419        m_left=dynamic_pointer_cast<DataLazy>(left);
420       }
421       else
422       {
423        m_left=DataLazy_ptr(new DataLazy(left));
424       }
425       if (right->isLazy())
426       {
427        m_right=dynamic_pointer_cast<DataLazy>(right);
428       }
429       else
430       {
431        m_right=DataLazy_ptr(new DataLazy(right));
432       }
433       char lt=m_left->m_readytype;
434       char rt=m_right->m_readytype;
435       if (lt=='E' || rt=='E')
436       {
437        m_readytype='E';
438       }
439       else if (lt=='T' || rt=='T')
440       {
441        m_readytype='T';
442       }
443       else
444       {
445        m_readytype='C';
446       }
447     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
448     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
449  cout << "(2)Lazy created with " << m_samplesize << endl;     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
450    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
451  }  }
452    
453  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
454      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
455      m_op(op)      m_op(op),
456        m_axis_offset(axis_offset),
457        m_transpose(transpose)
458  {  {
459     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
460       {
461        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
462       }
463       if ((transpose>2) || (transpose<0))
464     {     {
465      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
466     }     }
467     if (left->isLazy())     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
468       {
469        FunctionSpace fs=getFunctionSpace();
470        Data ltemp(left);
471        Data tmp(ltemp,fs);
472        left=tmp.borrowDataPtr();
473       }
474       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
475       {
476        Data tmp(Data(right),getFunctionSpace());
477        right=tmp.borrowDataPtr();
478       }
479       left->operandCheck(*right);
480    
481       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
482     {     {
483      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
484     }     }
# Line 225  DataLazy::DataLazy(DataAbstract_ptr left Line 494  DataLazy::DataLazy(DataAbstract_ptr left
494     {     {
495      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
496     }     }
497       char lt=m_left->m_readytype;
498     m_length=resultLength(m_left,m_right,m_op);     char rt=m_right->m_readytype;
499       if (lt=='E' || rt=='E')
500       {
501        m_readytype='E';
502       }
503       else if (lt=='T' || rt=='T')
504       {
505        m_readytype='T';
506       }
507       else
508       {
509        m_readytype='C';
510       }
511     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
512       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
513     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
514  cout << "(3)Lazy created with " << m_samplesize << endl;  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
515    }
516    
517    
518    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
519        : parent(left->getFunctionSpace(), resultShape(left,op)),
520        m_op(op),
521        m_axis_offset(axis_offset),
522        m_transpose(0),
523        m_tol(0)
524    {
525       if ((getOpgroup(op)!=G_NP1OUT_P))
526       {
527        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
528       }
529       DataLazy_ptr lleft;
530       if (!left->isLazy())
531       {
532        lleft=DataLazy_ptr(new DataLazy(left));
533       }
534       else
535       {
536        lleft=dynamic_pointer_cast<DataLazy>(left);
537       }
538       m_readytype=lleft->m_readytype;
539       m_left=lleft;
540       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
541       m_samplesize=getNumDPPSample()*getNoValues();
542       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
543    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
544  }  }
545    
546    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
547        : parent(left->getFunctionSpace(), left->getShape()),
548        m_op(op),
549        m_axis_offset(0),
550        m_transpose(0),
551        m_tol(tol)
552    {
553       if ((getOpgroup(op)!=G_UNARY_P))
554       {
555        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
556       }
557       DataLazy_ptr lleft;
558       if (!left->isLazy())
559       {
560        lleft=DataLazy_ptr(new DataLazy(left));
561       }
562       else
563       {
564        lleft=dynamic_pointer_cast<DataLazy>(left);
565       }
566       m_readytype=lleft->m_readytype;
567       m_left=lleft;
568       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
569       m_samplesize=getNumDPPSample()*getNoValues();
570       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
571    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
572    }
573    
574  DataLazy::~DataLazy()  DataLazy::~DataLazy()
575  {  {
# Line 245  DataLazy::getBuffsRequired() const Line 583  DataLazy::getBuffsRequired() const
583  }  }
584    
585    
586  // the vector and the offset are a place where the method could write its data if it wishes  size_t
587  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::getMaxSampleSize() const
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
 const double*  
 DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  
588  {  {
589    if (m_op==IDENTITY)        return m_maxsamplesize;
590    }
591    
592    /*
593      \brief Evaluates the expression using methods on Data.
594      This does the work for the collapse method.
595      For reasons of efficiency do not call this method on DataExpanded nodes.
596    */
597    DataReady_ptr
598    DataLazy::collapseToReady()
599    {
600      if (m_readytype=='E')
601      { // this is more an efficiency concern than anything else
602        throw DataException("Programmer Error - do not use collapse on Expanded data.");
603      }
604      if (m_op==IDENTITY)
605    {    {
606      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
607    }    }
608    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
609    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
610    const double* right=0;    Data right;
611    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
612    {    {
613      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
614    }    }
615    double* result=&(v[offset]);    Data result;
616      switch(m_op)
617    {    {
618      switch(m_op)      case ADD:
619      {      result=left+right;
     case ADD:       // since these are pointwise ops, pretend each sample is one point  
     tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  
620      break;      break;
621      case SUB:            case SUB:      
622      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      result=left-right;
623      break;      break;
624      case MUL:            case MUL:      
625      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      result=left*right;
626      break;      break;
627      case DIV:            case DIV:      
628      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      result=left/right;
629      break;      break;
 // unary ops  
630      case SIN:      case SIN:
631      tensor_unary_operation(m_samplesize, left, result, ::sin);      result=left.sin();  
632      break;      break;
633      case COS:      case COS:
634      tensor_unary_operation(m_samplesize, left, result, ::cos);      result=left.cos();
635      break;      break;
636      case TAN:      case TAN:
637      tensor_unary_operation(m_samplesize, left, result, ::tan);      result=left.tan();
638      break;      break;
639      case ASIN:      case ASIN:
640      tensor_unary_operation(m_samplesize, left, result, ::asin);      result=left.asin();
641      break;      break;
642      case ACOS:      case ACOS:
643      tensor_unary_operation(m_samplesize, left, result, ::acos);      result=left.acos();
644      break;      break;
645      case ATAN:      case ATAN:
646      tensor_unary_operation(m_samplesize, left, result, ::atan);      result=left.atan();
647      break;      break;
648      case SINH:      case SINH:
649      tensor_unary_operation(m_samplesize, left, result, ::sinh);      result=left.sinh();
650      break;      break;
651      case COSH:      case COSH:
652      tensor_unary_operation(m_samplesize, left, result, ::cosh);      result=left.cosh();
653      break;      break;
654      case TANH:      case TANH:
655      tensor_unary_operation(m_samplesize, left, result, ::tanh);      result=left.tanh();
656      break;      break;
657      case ERF:      case ERF:
658  #ifdef _WIN32      result=left.erf();
659        break;
660       case ASINH:
661        result=left.asinh();
662        break;
663       case ACOSH:
664        result=left.acosh();
665        break;
666       case ATANH:
667        result=left.atanh();
668        break;
669        case LOG10:
670        result=left.log10();
671        break;
672        case LOG:
673        result=left.log();
674        break;
675        case SIGN:
676        result=left.sign();
677        break;
678        case ABS:
679        result=left.abs();
680        break;
681        case NEG:
682        result=left.neg();
683        break;
684        case POS:
685        // it doesn't mean anything for delayed.
686        // it will just trigger a deep copy of the lazy object
687        throw DataException("Programmer error - POS not supported for lazy data.");
688        break;
689        case EXP:
690        result=left.exp();
691        break;
692        case SQRT:
693        result=left.sqrt();
694        break;
695        case RECIP:
696        result=left.oneOver();
697        break;
698        case GZ:
699        result=left.wherePositive();
700        break;
701        case LZ:
702        result=left.whereNegative();
703        break;
704        case GEZ:
705        result=left.whereNonNegative();
706        break;
707        case LEZ:
708        result=left.whereNonPositive();
709        break;
710        case NEZ:
711        result=left.whereNonZero(m_tol);
712        break;
713        case EZ:
714        result=left.whereZero(m_tol);
715        break;
716        case SYM:
717        result=left.symmetric();
718        break;
719        case NSYM:
720        result=left.nonsymmetric();
721        break;
722        case PROD:
723        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
724        break;
725        case TRANS:
726        result=left.transpose(m_axis_offset);
727        break;
728        case TRACE:
729        result=left.trace(m_axis_offset);
730        break;
731        default:
732        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
733      }
734      return result.borrowReadyPtr();
735    }
736    
737    /*
738       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
739       This method uses the original methods on the Data class to evaluate the expressions.
740       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
741       the purpose of using DataLazy in the first place).
742    */
743    void
744    DataLazy::collapse()
745    {
746      if (m_op==IDENTITY)
747      {
748        return;
749      }
750      if (m_readytype=='E')
751      { // this is more an efficiency concern than anything else
752        throw DataException("Programmer Error - do not use collapse on Expanded data.");
753      }
754      m_id=collapseToReady();
755      m_op=IDENTITY;
756    }
757    
758    /*
759      \brief Compute the value of the expression (unary operation) for the given sample.
760      \return Vector which stores the value of the subexpression for the given sample.
761      \param v A vector to store intermediate results.
762      \param offset Index in v to begin storing results.
763      \param sampleNo Sample number to evaluate.
764      \param roffset (output parameter) the offset in the return vector where the result begins.
765    
766      The return value will be an existing vector so do not deallocate it.
767      If the result is stored in v it should be stored at the offset given.
768      Everything from offset to the end of v should be considered available for this method to use.
769    */
770    DataTypes::ValueType*
771    DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
772    {
773        // we assume that any collapsing has been done before we get here
774        // since we only have one argument we don't need to think about only
775        // processing single points.
776      if (m_readytype!='E')
777      {
778        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
779      }
780      const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
781      const double* left=&((*vleft)[roffset]);
782      double* result=&(v[offset]);
783      roffset=offset;
784      switch (m_op)
785      {
786        case SIN:  
787        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
788        break;
789        case COS:
790        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
791        break;
792        case TAN:
793        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
794        break;
795        case ASIN:
796        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
797        break;
798        case ACOS:
799        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
800        break;
801        case ATAN:
802        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
803        break;
804        case SINH:
805        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
806        break;
807        case COSH:
808        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
809        break;
810        case TANH:
811        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
812        break;
813        case ERF:
814    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
815      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
816  #else  #else
817      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
818      break;      break;
819  #endif  #endif
820     case ASINH:     case ASINH:
821  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
822      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
823  #else  #else
824      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
825  #endif    #endif  
826      break;      break;
827     case ACOSH:     case ACOSH:
828  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
829      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
830  #else  #else
831      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
832  #endif    #endif  
833      break;      break;
834     case ATANH:     case ATANH:
835  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
836      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
837  #else  #else
838      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
839  #endif    #endif  
840      break;      break;
841      case LOG10:      case LOG10:
842      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
843      break;      break;
844      case LOG:      case LOG:
845      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
846      break;      break;
847      case SIGN:      case SIGN:
848      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
849      break;      break;
850      case ABS:      case ABS:
851      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
852      break;      break;
853      case NEG:      case NEG:
854      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 357  DataLazy::resolveSample(ValueType& v,int Line 859  DataLazy::resolveSample(ValueType& v,int
859      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
860      break;      break;
861      case EXP:      case EXP:
862      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
863      break;      break;
864      case SQRT:      case SQRT:
865      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
866      break;      break;
867      case RECIP:      case RECIP:
868      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 377  DataLazy::resolveSample(ValueType& v,int Line 879  DataLazy::resolveSample(ValueType& v,int
879      case LEZ:      case LEZ:
880      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
881      break;      break;
882    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
883        case NEZ:
884        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
885        break;
886        case EZ:
887        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
888        break;
889    
890      default:      default:
891      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
892      }
893      return &v;
894    }
895    
896    
897    
898    
899    
900    
901    /*
902      \brief Compute the value of the expression (unary operation) for the given sample.
903      \return Vector which stores the value of the subexpression for the given sample.
904      \param v A vector to store intermediate results.
905      \param offset Index in v to begin storing results.
906      \param sampleNo Sample number to evaluate.
907      \param roffset (output parameter) the offset in the return vector where the result begins.
908    
909      The return value will be an existing vector so do not deallocate it.
910      If the result is stored in v it should be stored at the offset given.
911      Everything from offset to the end of v should be considered available for this method to use.
912    */
913    DataTypes::ValueType*
914    DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
915    {
916        // we assume that any collapsing has been done before we get here
917        // since we only have one argument we don't need to think about only
918        // processing single points.
919      if (m_readytype!='E')
920      {
921        throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
922      }
923        // since we can't write the result over the input, we need a result offset further along
924      size_t subroffset=roffset+m_samplesize;
925    LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
926      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
927      roffset=offset;
928      size_t loop=0;
929      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
930      size_t step=getNoValues();
931      switch (m_op)
932      {
933        case SYM:
934        for (loop=0;loop<numsteps;++loop)
935        {
936            DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
937            subroffset+=step;
938            offset+=step;
939        }
940        break;
941        case NSYM:
942        for (loop=0;loop<numsteps;++loop)
943        {
944            DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
945            subroffset+=step;
946            offset+=step;
947        }
948        break;
949        default:
950        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
951      }
952      return &v;
953    }
954    
955    /*
956      \brief Compute the value of the expression (unary operation) for the given sample.
957      \return Vector which stores the value of the subexpression for the given sample.
958      \param v A vector to store intermediate results.
959      \param offset Index in v to begin storing results.
960      \param sampleNo Sample number to evaluate.
961      \param roffset (output parameter) the offset in the return vector where the result begins.
962    
963      The return value will be an existing vector so do not deallocate it.
964      If the result is stored in v it should be stored at the offset given.
965      Everything from offset to the end of v should be considered available for this method to use.
966    */
967    DataTypes::ValueType*
968    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
969    {
970        // we assume that any collapsing has been done before we get here
971        // since we only have one argument we don't need to think about only
972        // processing single points.
973      if (m_readytype!='E')
974      {
975        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
976      }
977        // since we can't write the result over the input, we need a result offset further along
978      size_t subroffset;
979      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
980    LAZYDEBUG(cerr << "from=" << offset+m_left->m_samplesize << " to=" << subroffset << " ret=" << roffset << endl;)
981      roffset=offset;
982      size_t loop=0;
983      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
984      size_t step=getNoValues();
985      switch (m_op)
986      {
987        case TRACE:
988        for (loop=0;loop<numsteps;++loop)
989        {
990                DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
991            subroffset+=step;
992            offset+=step;
993        }
994        break;
995        case TRANS:
996        for (loop=0;loop<numsteps;++loop)
997        {
998                DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
999            subroffset+=step;
1000            offset+=step;
1001        }
1002        break;
1003        default:
1004        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1005      }
1006      return &v;
1007    }
1008    
1009    
1010    #define PROC_OP(TYPE,X)                               \
1011        for (int j=0;j<onumsteps;++j)\
1012        {\
1013          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1014          { \
1015    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1016    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1017             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1018    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1019             lroffset+=leftstep; \
1020             rroffset+=rightstep; \
1021          }\
1022          lroffset+=oleftstep;\
1023          rroffset+=orightstep;\
1024        }
1025    
1026    /*
1027      \brief Compute the value of the expression (binary operation) for the given sample.
1028      \return Vector which stores the value of the subexpression for the given sample.
1029      \param v A vector to store intermediate results.
1030      \param offset Index in v to begin storing results.
1031      \param sampleNo Sample number to evaluate.
1032      \param roffset (output parameter) the offset in the return vector where the result begins.
1033    
1034      The return value will be an existing vector so do not deallocate it.
1035      If the result is stored in v it should be stored at the offset given.
1036      Everything from offset to the end of v should be considered available for this method to use.
1037    */
1038    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1039    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1040    // If both children are expanded, then we can process them in a single operation (we treat
1041    // the whole sample as one big datapoint.
1042    // If one of the children is not expanded, then we need to treat each point in the sample
1043    // individually.
1044    // There is an additional complication when scalar operations are considered.
1045    // For example, 2+Vector.
1046    // In this case each double within the point is treated individually
1047    DataTypes::ValueType*
1048    DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1049    {
1050    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1051    
1052      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1053        // first work out which of the children are expanded
1054      bool leftExp=(m_left->m_readytype=='E');
1055      bool rightExp=(m_right->m_readytype=='E');
1056      if (!leftExp && !rightExp)
1057      {
1058        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1059      }
1060      bool leftScalar=(m_left->getRank()==0);
1061      bool rightScalar=(m_right->getRank()==0);
1062      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1063      {
1064        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1065      }
1066      size_t leftsize=m_left->getNoValues();
1067      size_t rightsize=m_right->getNoValues();
1068      size_t chunksize=1;           // how many doubles will be processed in one go
1069      int leftstep=0;       // how far should the left offset advance after each step
1070      int rightstep=0;
1071      int numsteps=0;       // total number of steps for the inner loop
1072      int oleftstep=0;  // the o variables refer to the outer loop
1073      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1074      int onumsteps=1;
1075      
1076      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1077      bool RES=(rightExp && rightScalar);
1078      bool LS=(!leftExp && leftScalar); // left is a single scalar
1079      bool RS=(!rightExp && rightScalar);
1080      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1081      bool RN=(!rightExp && !rightScalar);
1082      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1083      bool REN=(rightExp && !rightScalar);
1084    
1085      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1086      {
1087        chunksize=m_left->getNumDPPSample()*leftsize;
1088        leftstep=0;
1089        rightstep=0;
1090        numsteps=1;
1091      }
1092      else if (LES || RES)
1093      {
1094        chunksize=1;
1095        if (LES)        // left is an expanded scalar
1096        {
1097            if (RS)
1098            {
1099               leftstep=1;
1100               rightstep=0;
1101               numsteps=m_left->getNumDPPSample();
1102            }
1103            else        // RN or REN
1104            {
1105               leftstep=0;
1106               oleftstep=1;
1107               rightstep=1;
1108               orightstep=(RN?-rightsize:0);
1109               numsteps=rightsize;
1110               onumsteps=m_left->getNumDPPSample();
1111            }
1112        }
1113        else        // right is an expanded scalar
1114        {
1115            if (LS)
1116            {
1117               rightstep=1;
1118               leftstep=0;
1119               numsteps=m_right->getNumDPPSample();
1120            }
1121            else
1122            {
1123               rightstep=0;
1124               orightstep=1;
1125               leftstep=1;
1126               oleftstep=(LN?-leftsize:0);
1127               numsteps=leftsize;
1128               onumsteps=m_right->getNumDPPSample();
1129            }
1130        }
1131      }
1132      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1133      {
1134        if (LEN)    // and Right will be a single value
1135        {
1136            chunksize=rightsize;
1137            leftstep=rightsize;
1138            rightstep=0;
1139            numsteps=m_left->getNumDPPSample();
1140            if (RS)
1141            {
1142               numsteps*=leftsize;
1143            }
1144        }
1145        else    // REN
1146        {
1147            chunksize=leftsize;
1148            rightstep=leftsize;
1149            leftstep=0;
1150            numsteps=m_right->getNumDPPSample();
1151            if (LS)
1152            {
1153               numsteps*=rightsize;
1154            }
1155        }
1156      }
1157    
1158      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1159        // Get the values of sub-expressions
1160      const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on
1161        // calcBufss for why we can't put offset as the 2nd param above
1162      const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1163        // the right child starts further along.
1164    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1165    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1166    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1167    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1168    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1169    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1170    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1171      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1172      switch(m_op)
1173      {
1174        case ADD:
1175            PROC_OP(NO_ARG,plus<double>());
1176        break;
1177        case SUB:
1178        PROC_OP(NO_ARG,minus<double>());
1179        break;
1180        case MUL:
1181        PROC_OP(NO_ARG,multiplies<double>());
1182        break;
1183        case DIV:
1184        PROC_OP(NO_ARG,divides<double>());
1185        break;
1186        case POW:
1187           PROC_OP(double (double,double),::pow);
1188        break;
1189        default:
1190        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1191      }
1192      roffset=offset;  
1193      return &v;
1194    }
1195    
1196    
1197    
1198    /*
1199      \brief Compute the value of the expression (tensor product) for the given sample.
1200      \return Vector which stores the value of the subexpression for the given sample.
1201      \param v A vector to store intermediate results.
1202      \param offset Index in v to begin storing results.
1203      \param sampleNo Sample number to evaluate.
1204      \param roffset (output parameter) the offset in the return vector where the result begins.
1205    
1206      The return value will be an existing vector so do not deallocate it.
1207      If the result is stored in v it should be stored at the offset given.
1208      Everything from offset to the end of v should be considered available for this method to use.
1209    */
1210    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1211    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1212    // unlike the other resolve helpers, we must treat these datapoints separately.
1213    DataTypes::ValueType*
1214    DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1215    {
1216    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1217    
1218      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1219        // first work out which of the children are expanded
1220      bool leftExp=(m_left->m_readytype=='E');
1221      bool rightExp=(m_right->m_readytype=='E');
1222      int steps=getNumDPPSample();
1223      int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1224      int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1225      int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0
1226        // Get the values of sub-expressions (leave a gap of one sample for the result).
1227      int gap=offset+m_left->getMaxSampleSize();    // actually only needs to be m_left->m_samplesize
1228      const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1229      gap+=m_right->getMaxSampleSize();
1230      const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1231    LAZYDEBUG(cout << "Post sub calls: " << toString() << endl;)
1232    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1233    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1234    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1235    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1236    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1237      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1238      switch(m_op)
1239      {
1240        case PROD:
1241        for (int i=0;i<steps;++i,resultp+=resultStep)
1242        {
1243    LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1244    LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1245    LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1246              const double *ptr_0 = &((*left)[lroffset]);
1247              const double *ptr_1 = &((*right)[rroffset]);
1248    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1249    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1250              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1251          lroffset+=leftStep;
1252          rroffset+=rightStep;
1253        }
1254        break;
1255        default:
1256        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1257      }
1258      roffset=offset;
1259      return &v;
1260    }
1261    
1262    
1263    
1264    /*
1265      \brief Compute the value of the expression for the given sample.
1266      \return Vector which stores the value of the subexpression for the given sample.
1267      \param v A vector to store intermediate results.
1268      \param offset Index in v to begin storing results.
1269      \param sampleNo Sample number to evaluate.
1270      \param roffset (output parameter) the offset in the return vector where the result begins.
1271    
1272      The return value will be an existing vector so do not deallocate it.
1273    */
1274    // the vector and the offset are a place where the method could write its data if it wishes
1275    // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1276    // Hence the return value to indicate where the data is actually stored.
1277    // Regardless, the storage should be assumed to be used, even if it isn't.
1278    
1279    // the roffset is the offset within the returned vector where the data begins
1280    const DataTypes::ValueType*
1281    DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1282    {
1283    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1284        // collapse so we have a 'E' node or an IDENTITY for some other type
1285      if (m_readytype!='E' && m_op!=IDENTITY)
1286      {
1287        collapse();
1288      }
1289      if (m_op==IDENTITY)  
1290      {
1291        const ValueType& vec=m_id->getVector();
1292        if (m_readytype=='C')
1293        {
1294        roffset=0;
1295    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1296        return &(vec);
1297      }      }
1298        roffset=m_id->getPointOffset(sampleNo, 0);
1299    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1300        return &(vec);
1301    }    }
1302    return result;    if (m_readytype!='E')
1303      {
1304        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1305      }
1306      switch (getOpgroup(m_op))
1307      {
1308      case G_UNARY:
1309      case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
1310      case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1311      case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1312      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1313      case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1314      default:
1315        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1316      }
1317    
1318  }  }
1319    
1320    
1321    // To simplify the memory management, all threads operate on one large vector, rather than one each.
1322    // Each sample is evaluated independently and copied into the result DataExpanded.
1323  DataReady_ptr  DataReady_ptr
1324  DataLazy::resolve()  DataLazy::resolve()
1325  {  {
   // This is broken!     We need to have a buffer per thread!  
   // so the allocation of v needs to move inside the loop somehow  
1326    
1327  cout << "Sample size=" << m_samplesize << endl;  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1328  cout << "Buffers=" << m_buffsRequired << endl;  LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1329    
1330    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1331      {
1332        collapse();
1333      }
1334      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1335      {
1336        return m_id;
1337      }
1338        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1339      size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1340        // storage to evaluate its expression
1341    int numthreads=1;    int numthreads=1;
1342  #ifdef _OPENMP  #ifdef _OPENMP
1343    numthreads=omp_get_max_threads();    numthreads=getNumberOfThreads();
   int threadnum=0;  
1344  #endif  #endif
1345    ValueType v(numthreads*threadbuffersize);    ValueType v(numthreads*threadbuffersize);
1346  cout << "Buffer created with size=" << v.size() << endl;  LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1347    ValueType dummy(getNoValues());    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);  
1348    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVector();
1349    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1350    int sample;    int sample;
1351    int resoffset;    size_t outoffset;     // offset in the output data
1352    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1353    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;   // Vector storing the answer
1354      size_t resoffset=0;       // where in the vector to find the answer
1355    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1356      #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1357    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
1358    {    {
1359          if (sample==0)  {ENABLEDEBUG}
1360    LAZYDEBUG(cout << "################################# " << sample << endl;)
1361  #ifdef _OPENMP  #ifdef _OPENMP
1362      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1363  #else  #else
1364      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
1365  #endif  #endif
1366      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1367      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1368        outoffset=result->getPointOffset(sample,0);
1369    LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1370        for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
1371      {      {
1372      resvec[resoffset]=res[i];  // LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << endl;)
1373        resvec[outoffset]=(*res)[resoffset];
1374      }      }
1375    LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1376    LAZYDEBUG(cerr << "*********************************" << endl;)
1377        DISABLEDEBUG
1378    }    }
1379    return resptr;    return resptr;
1380  }  }
# Line 435  DataLazy::toString() const Line 1388  DataLazy::toString() const
1388    return oss.str();    return oss.str();
1389  }  }
1390    
1391    
1392  void  void
1393  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1394  {  {
1395    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1396    {    {
1397    case G_IDENTITY:    case G_IDENTITY:
1398        if (m_id->isExpanded())
1399        {
1400           oss << "E";
1401        }
1402        else if (m_id->isTagged())
1403        {
1404          oss << "T";
1405        }
1406        else if (m_id->isConstant())
1407        {
1408          oss << "C";
1409        }
1410        else
1411        {
1412          oss << "?";
1413        }
1414      oss << '@' << m_id.get();      oss << '@' << m_id.get();
1415      break;      break;
1416    case G_BINARY:    case G_BINARY:
# Line 451  DataLazy::intoString(ostringstream& oss) Line 1421  DataLazy::intoString(ostringstream& oss)
1421      oss << ')';      oss << ')';
1422      break;      break;
1423    case G_UNARY:    case G_UNARY:
1424      case G_UNARY_P:
1425      case G_NP1OUT:
1426      case G_NP1OUT_P:
1427      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1428      m_left->intoString(oss);      m_left->intoString(oss);
1429      oss << ')';      oss << ')';
1430      break;      break;
1431      case G_TENSORPROD:
1432        oss << opToString(m_op) << '(';
1433        m_left->intoString(oss);
1434        oss << ", ";
1435        m_right->intoString(oss);
1436        oss << ')';
1437        break;
1438    default:    default:
1439      oss << "UNKNOWN";      oss << "UNKNOWN";
1440    }    }
1441  }  }
1442    
 // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
1443  DataAbstract*  DataAbstract*
1444  DataLazy::deepCopy()  DataLazy::deepCopy()
1445  {  {
1446    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1447    {    {
1448      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1449      case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1450      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1451      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1452      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1453      default:
1454        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1455    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1456  }  }
1457    
1458    
1459    // There is no single, natural interpretation of getLength on DataLazy.
1460    // Instances of DataReady can look at the size of their vectors.
1461    // For lazy though, it could be the size the data would be if it were resolved;
1462    // or it could be some function of the lengths of the DataReady instances which
1463    // form part of the expression.
1464    // Rather than have people making assumptions, I have disabled the method.
1465  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1466  DataLazy::getLength() const  DataLazy::getLength() const
1467  {  {
1468    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1469  }  }
1470    
1471    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 1475  DataLazy::getSlice(const DataTypes::Regi
1475    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1476  }  }
1477    
1478    
1479    // To do this we need to rely on our child nodes
1480    DataTypes::ValueType::size_type
1481    DataLazy::getPointOffset(int sampleNo,
1482                     int dataPointNo)
1483    {
1484      if (m_op==IDENTITY)
1485      {
1486        return m_id->getPointOffset(sampleNo,dataPointNo);
1487      }
1488      if (m_readytype!='E')
1489      {
1490        collapse();
1491        return m_id->getPointOffset(sampleNo,dataPointNo);
1492      }
1493      // at this point we do not have an identity node and the expression will be Expanded
1494      // so we only need to know which child to ask
1495      if (m_left->m_readytype=='E')
1496      {
1497        return m_left->getPointOffset(sampleNo,dataPointNo);
1498      }
1499      else
1500      {
1501        return m_right->getPointOffset(sampleNo,dataPointNo);
1502      }
1503    }
1504    
1505    // To do this we need to rely on our child nodes
1506  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1507  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1508                   int dataPointNo) const                   int dataPointNo) const
1509  {  {
1510    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1511      {
1512        return m_id->getPointOffset(sampleNo,dataPointNo);
1513      }
1514      if (m_readytype=='E')
1515      {
1516        // at this point we do not have an identity node and the expression will be Expanded
1517        // so we only need to know which child to ask
1518        if (m_left->m_readytype=='E')
1519        {
1520        return m_left->getPointOffset(sampleNo,dataPointNo);
1521        }
1522        else
1523        {
1524        return m_right->getPointOffset(sampleNo,dataPointNo);
1525        }
1526      }
1527      if (m_readytype=='C')
1528      {
1529        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1530      }
1531      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1532    }
1533    
1534    // It would seem that DataTagged will need to be treated differently since even after setting all tags
1535    // to zero, all the tags from all the DataTags would be in the result.
1536    // However since they all have the same value (0) whether they are there or not should not matter.
1537    // So I have decided that for all types this method will create a constant 0.
1538    // It can be promoted up as required.
1539    // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1540    // but we can deal with that if it arrises.
1541    void
1542    DataLazy::setToZero()
1543    {
1544      DataTypes::ValueType v(getNoValues(),0);
1545      m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1546      m_op=IDENTITY;
1547      m_right.reset();  
1548      m_left.reset();
1549      m_readytype='C';
1550      m_buffsRequired=1;
1551  }  }
1552    
1553  }   // end namespace  }   // end namespace

Legend:
Removed from v.1888  
changed lines
  Added in v.2157

  ViewVC Help
Powered by ViewVC 1.1.26