/[escript]/trunk/escript/src/DataLazy.cpp
ViewVC logotype

Diff of /trunk/escript/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1898 by jfenwick, Mon Oct 20 01:20:18 2008 UTC trunk/escript/src/DataLazy.cpp revision 2085 by jfenwick, Mon Nov 24 00:45:48 2008 UTC
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    /*
32    How does DataLazy work?
33    ~~~~~~~~~~~~~~~~~~~~~~~
34    
35    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
36    denoted left and right.
37    
38    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
39    This means that all "internal" nodes in the structure are instances of DataLazy.
40    
41    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
42    Note that IDENITY is not considered a unary operation.
43    
44    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
45    It must however form a DAG (directed acyclic graph).
46    I will refer to individual DataLazy objects with the structure as nodes.
47    
48    Each node also stores:
49    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
50        evaluated.
51    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
52        evaluate the expression.
53    - m_samplesize ~ the number of doubles stored in a sample.
54    
55    When a new node is created, the above values are computed based on the values in the child nodes.
56    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
57    
58    The resolve method, which produces a DataReady from a DataLazy, does the following:
59    1) Create a DataReady to hold the new result.
60    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
61    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
62    
63    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
64    
65    resolveSample returns a Vector* and an offset within that vector where the result is stored.
66    Normally, this would be v, but for identity nodes their internal vector is returned instead.
67    
68    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
69    
70    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
71    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
72    
73    To add a new operator you need to do the following (plus anything I might have forgotten):
74    1) Add to the ES_optype.
75    2) determine what opgroup your operation belongs to (X)
76    3) add a string for the op to the end of ES_opstrings
77    4) increase ES_opcount
78    5) add an entry (X) to opgroups
79    6) add an entry to the switch in collapseToReady
80    7) add an entry to resolveX
81    */
82    
83    
84  using namespace std;  using namespace std;
85  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 87  using namespace boost;
87  namespace escript  namespace escript
88  {  {
89    
 const std::string&  
 opToString(ES_optype op);  
   
90  namespace  namespace
91  {  {
92    
   
   
93  enum ES_opgroup  enum ES_opgroup
94  {  {
95     G_UNKNOWN,     G_UNKNOWN,
96     G_IDENTITY,     G_IDENTITY,
97     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
98     G_UNARY     G_UNARY,     // pointwise operations with one argument
99       G_NP1OUT,        // non-pointwise op with one output
100       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
101       G_TENSORPROD     // general tensor product
102  };  };
103    
104    
105    
106    
107  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
108                "sin","cos","tan",
109              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
110              "asinh","acosh","atanh",              "asinh","acosh","atanh",
111              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
112              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0",
113  int ES_opcount=32;              "symmetric","nonsymmetric",
114  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
115              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose",
116              G_UNARY,G_UNARY,G_UNARY,                    // 19              "trace"};
117              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27  int ES_opcount=38;
118              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
119                G_UNARY,G_UNARY,G_UNARY, //10
120                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
121                G_UNARY,G_UNARY,G_UNARY,                    // 20
122                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
123                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33
124                G_NP1OUT,G_NP1OUT,
125                G_TENSORPROD,
126                G_NP1OUT_P, G_NP1OUT_P};
127  inline  inline
128  ES_opgroup  ES_opgroup
129  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 140  resultFS(DataAbstract_ptr left, DataAbst
140      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
141      // programming error exception.      // programming error exception.
142    
143      FunctionSpace l=left->getFunctionSpace();
144      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
145      {    if (l!=r)
146          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
147      }      if (r.probeInterpolation(l))
148      return left->getFunctionSpace();      {
149        return l;
150        }
151        if (l.probeInterpolation(r))
152        {
153        return r;
154        }
155        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
156      }
157      return l;
158  }  }
159    
160  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
161    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
162  DataTypes::ShapeType  DataTypes::ShapeType
163  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
164  {  {
165      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
166      {      {
167          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
168          {
169            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
170          }
171          if (left->getRank()==0)   // we need to allow scalar * anything
172          {
173            return right->getShape();
174          }
175          if (right->getRank()==0)
176          {
177            return left->getShape();
178          }
179          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
180      }      }
181      return left->getShape();      return left->getShape();
182  }  }
183    
184  size_t  // return the shape for "op left"
185  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
186    DataTypes::ShapeType
187    resultShape(DataAbstract_ptr left, ES_optype op)
188  {  {
189     switch (getOpgroup(op))      switch(op)
190     {      {
191     case G_BINARY: return left->getLength();          case TRANS:
192     case G_UNARY: return left->getLength();          return left->getShape();
193     default:      break;
194      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");      case TRACE:
195     }          return DataTypes::scalarShape;
196        break;
197            default:
198        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
199        }
200  }  }
201    
202    // determine the output shape for the general tensor product operation
203    // the additional parameters return information required later for the product
204    // the majority of this code is copy pasted from C_General_Tensor_Product
205    DataTypes::ShapeType
206    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
207    {
208        
209      // Get rank and shape of inputs
210      int rank0 = left->getRank();
211      int rank1 = right->getRank();
212      const DataTypes::ShapeType& shape0 = left->getShape();
213      const DataTypes::ShapeType& shape1 = right->getShape();
214    
215      // Prepare for the loops of the product and verify compatibility of shapes
216      int start0=0, start1=0;
217      if (transpose == 0)       {}
218      else if (transpose == 1)  { start0 = axis_offset; }
219      else if (transpose == 2)  { start1 = rank1-axis_offset; }
220      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
221    
222      if (rank0<axis_offset)
223      {
224        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
225      }
226    
227      // Adjust the shapes for transpose
228      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
229      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
230      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
231      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
232    
233      // Prepare for the loops of the product
234      SL=1, SM=1, SR=1;
235      for (int i=0; i<rank0-axis_offset; i++)   {
236        SL *= tmpShape0[i];
237      }
238      for (int i=rank0-axis_offset; i<rank0; i++)   {
239        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
240          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
241        }
242        SM *= tmpShape0[i];
243      }
244      for (int i=axis_offset; i<rank1; i++)     {
245        SR *= tmpShape1[i];
246      }
247    
248      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
249      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
250      {         // block to limit the scope of out_index
251         int out_index=0;
252         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
253         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
254      }
255      return shape2;
256    }
257    
258    
259    // determine the number of points in the result of "left op right"
260    // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here
261    // size_t
262    // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
263    // {
264    //    switch (getOpgroup(op))
265    //    {
266    //    case G_BINARY: return left->getLength();
267    //    case G_UNARY: return left->getLength();
268    //    case G_NP1OUT: return left->getLength();
269    //    default:
270    //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");
271    //    }
272    // }
273    
274    // determine the number of samples requires to evaluate an expression combining left and right
275    // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
276    // The same goes for G_TENSORPROD
277  int  int
278  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
279  {  {
# Line 118  calcBuffs(const DataLazy_ptr& left, cons Line 282  calcBuffs(const DataLazy_ptr& left, cons
282     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
283     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);
284     case G_UNARY: return max(left->getBuffsRequired(),1);     case G_UNARY: return max(left->getBuffsRequired(),1);
285       case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
286       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
287       case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
288     default:     default:
289      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
290     }     }
291  }  }
292    
293    
294  }   // end anonymous namespace  }   // end anonymous namespace
295    
296    
297    
298    // Return a string representing the operation
299  const std::string&  const std::string&
300  opToString(ES_optype op)  opToString(ES_optype op)
301  {  {
# Line 139  opToString(ES_optype op) Line 309  opToString(ES_optype op)
309    
310  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
311      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape()),
312      m_op(IDENTITY)      m_op(IDENTITY),
313        m_axis_offset(0),
314        m_transpose(0),
315        m_SL(0), m_SM(0), m_SR(0)
316  {  {
317     if (p->isLazy())     if (p->isLazy())
318     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
319      // I don't want identity of Lazy.      // I don't want identity of Lazy.
320      // Question: Why would that be so bad?      // Question: Why would that be so bad?
321      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 157  DataLazy::DataLazy(DataAbstract_ptr p) Line 329  DataLazy::DataLazy(DataAbstract_ptr p)
329      else if (p->isTagged()) {m_readytype='T';}      else if (p->isTagged()) {m_readytype='T';}
330      else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}      else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}
331     }     }
    m_length=p->getLength();  
332     m_buffsRequired=1;     m_buffsRequired=1;
333     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
334       m_maxsamplesize=m_samplesize;
335  cout << "(1)Lazy created with " << m_samplesize << endl;  cout << "(1)Lazy created with " << m_samplesize << endl;
336  }  }
337    
338    
339    
340    
341  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
342      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),left->getShape()),
343      m_op(op)      m_op(op),
344        m_axis_offset(0),
345        m_transpose(0),
346        m_SL(0), m_SM(0), m_SR(0)
347  {  {
348     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
349     {     {
350      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
351     }     }
352    
353     DataLazy_ptr lleft;     DataLazy_ptr lleft;
354     if (!left->isLazy())     if (!left->isLazy())
355     {     {
# Line 181  DataLazy::DataLazy(DataAbstract_ptr left Line 360  DataLazy::DataLazy(DataAbstract_ptr left
360      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
361     }     }
362     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
363     m_left=lleft;     m_left=lleft;
364     m_buffsRequired=1;     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
365     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
366       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
367  }  }
368    
369    
370  // DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
 //  : parent(resultFS(left,right,op), resultShape(left,right,op)),  
 //  m_left(left),  
 //  m_right(right),  
 //  m_op(op)  
 // {  
 //    if (getOpgroup(op)!=G_BINARY)  
 //    {  
 //  throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");  
 //    }  
 //    m_length=resultLength(m_left,m_right,m_op);  
 //    m_samplesize=getNumDPPSample()*getNoValues();  
 //    m_buffsRequired=calcBuffs(m_left, m_right, m_op);  
 // cout << "(2)Lazy created with " << m_samplesize << endl;  
 // }  
   
371  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
372      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
373      m_op(op)      m_op(op),
374        m_SL(0), m_SM(0), m_SR(0)
375  {  {
376     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_BINARY))
377     {     {
378      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
379     }     }
380     if (left->isLazy())  
381       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
382       {
383        FunctionSpace fs=getFunctionSpace();
384        Data ltemp(left);
385        Data tmp(ltemp,fs);
386        left=tmp.borrowDataPtr();
387       }
388       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
389       {
390        Data tmp(Data(right),getFunctionSpace());
391        right=tmp.borrowDataPtr();
392       }
393       left->operandCheck(*right);
394    
395       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
396     {     {
397      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
398     }     }
# Line 242  DataLazy::DataLazy(DataAbstract_ptr left Line 422  DataLazy::DataLazy(DataAbstract_ptr left
422     {     {
423      m_readytype='C';      m_readytype='C';
424     }     }
    m_length=resultLength(m_left,m_right,m_op);  
425     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
426       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
427     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
428  cout << "(3)Lazy created with " << m_samplesize << endl;  cout << "(3)Lazy created with " << m_samplesize << endl;
429  }  }
430    
431    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
432        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
433        m_op(op),
434        m_axis_offset(axis_offset),
435        m_transpose(transpose)
436    {
437       if ((getOpgroup(op)!=G_TENSORPROD))
438       {
439        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
440       }
441       if ((transpose>2) || (transpose<0))
442       {
443        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
444       }
445       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
446       {
447        FunctionSpace fs=getFunctionSpace();
448        Data ltemp(left);
449        Data tmp(ltemp,fs);
450        left=tmp.borrowDataPtr();
451       }
452       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
453       {
454        Data tmp(Data(right),getFunctionSpace());
455        right=tmp.borrowDataPtr();
456       }
457       left->operandCheck(*right);
458    
459       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
460       {
461        m_left=dynamic_pointer_cast<DataLazy>(left);
462       }
463       else
464       {
465        m_left=DataLazy_ptr(new DataLazy(left));
466       }
467       if (right->isLazy())
468       {
469        m_right=dynamic_pointer_cast<DataLazy>(right);
470       }
471       else
472       {
473        m_right=DataLazy_ptr(new DataLazy(right));
474       }
475       char lt=m_left->m_readytype;
476       char rt=m_right->m_readytype;
477       if (lt=='E' || rt=='E')
478       {
479        m_readytype='E';
480       }
481       else if (lt=='T' || rt=='T')
482       {
483        m_readytype='T';
484       }
485       else
486       {
487        m_readytype='C';
488       }
489       m_samplesize=getNumDPPSample()*getNoValues();
490       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
491       m_buffsRequired=calcBuffs(m_left, m_right,m_op);
492    cout << "(4)Lazy created with " << m_samplesize << endl;
493    }
494    
495    
496    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
497        : parent(left->getFunctionSpace(), resultShape(left,op)),
498        m_op(op),
499        m_axis_offset(axis_offset),
500        m_transpose(0)
501    {
502       if ((getOpgroup(op)!=G_NP1OUT_P))
503       {
504        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
505       }
506       DataLazy_ptr lleft;
507       if (!left->isLazy())
508       {
509        lleft=DataLazy_ptr(new DataLazy(left));
510       }
511       else
512       {
513        lleft=dynamic_pointer_cast<DataLazy>(left);
514       }
515       m_readytype=lleft->m_readytype;
516       m_left=lleft;
517       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
518       m_samplesize=getNumDPPSample()*getNoValues();
519       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
520    cout << "(5)Lazy created with " << m_samplesize << endl;
521    }
522    
523    
524  DataLazy::~DataLazy()  DataLazy::~DataLazy()
525  {  {
# Line 261  DataLazy::getBuffsRequired() const Line 533  DataLazy::getBuffsRequired() const
533  }  }
534    
535    
536    size_t
537    DataLazy::getMaxSampleSize() const
538    {
539        return m_maxsamplesize;
540    }
541    
542    /*
543      \brief Evaluates the expression using methods on Data.
544      This does the work for the collapse method.
545      For reasons of efficiency do not call this method on DataExpanded nodes.
546    */
547  DataReady_ptr  DataReady_ptr
548  DataLazy::collapseToReady()  DataLazy::collapseToReady()
549  {  {
# Line 275  DataLazy::collapseToReady() Line 558  DataLazy::collapseToReady()
558    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
559    Data left(pleft);    Data left(pleft);
560    Data right;    Data right;
561    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
562    {    {
563      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
564    }    }
# Line 374  DataLazy::collapseToReady() Line 657  DataLazy::collapseToReady()
657      case LEZ:      case LEZ:
658      result=left.whereNonPositive();      result=left.whereNonPositive();
659      break;      break;
660        case SYM:
661        result=left.symmetric();
662        break;
663        case NSYM:
664        result=left.nonsymmetric();
665        break;
666        case PROD:
667        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
668        break;
669        case TRANS:
670        result=left.transpose(m_axis_offset);
671        break;
672        case TRACE:
673        result=left.trace(m_axis_offset);
674        break;
675      default:      default:
676      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
677    }    }
678    return result.borrowReadyPtr();    return result.borrowReadyPtr();
679  }  }
680    
681    /*
682       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
683       This method uses the original methods on the Data class to evaluate the expressions.
684       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
685       the purpose of using DataLazy in the first place).
686    */
687  void  void
688  DataLazy::collapse()  DataLazy::collapse()
689  {  {
# Line 395  DataLazy::collapse() Line 699  DataLazy::collapse()
699    m_op=IDENTITY;    m_op=IDENTITY;
700  }  }
701    
702    /*
703      \brief Compute the value of the expression (unary operation) for the given sample.
704      \return Vector which stores the value of the subexpression for the given sample.
705      \param v A vector to store intermediate results.
706      \param offset Index in v to begin storing results.
707      \param sampleNo Sample number to evaluate.
708      \param roffset (output parameter) the offset in the return vector where the result begins.
709    
710      The return value will be an existing vector so do not deallocate it.
711      If the result is stored in v it should be stored at the offset given.
712      Everything from offset to the end of v should be considered available for this method to use.
713    */
714  DataTypes::ValueType*  DataTypes::ValueType*
715  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
716  {  {
# Line 412  DataLazy::resolveUnary(ValueType& v, siz Line 728  DataLazy::resolveUnary(ValueType& v, siz
728    switch (m_op)    switch (m_op)
729    {    {
730      case SIN:        case SIN:  
731      tensor_unary_operation(m_samplesize, left, result, ::sin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
732      break;      break;
733      case COS:      case COS:
734      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
735      break;      break;
736      case TAN:      case TAN:
737      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
738      break;      break;
739      case ASIN:      case ASIN:
740      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
741      break;      break;
742      case ACOS:      case ACOS:
743      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
744      break;      break;
745      case ATAN:      case ATAN:
746      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
747      break;      break;
748      case SINH:      case SINH:
749      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
750      break;      break;
751      case COSH:      case COSH:
752      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
753      break;      break;
754      case TANH:      case TANH:
755      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
756      break;      break;
757      case ERF:      case ERF:
758  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
759      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
760  #else  #else
761      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
762      break;      break;
763  #endif  #endif
764     case ASINH:     case ASINH:
765  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
766      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
767  #else  #else
768      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
769  #endif    #endif  
770      break;      break;
771     case ACOSH:     case ACOSH:
772  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
773      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
774  #else  #else
775      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
776  #endif    #endif  
777      break;      break;
778     case ATANH:     case ATANH:
779  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
780      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
781  #else  #else
782      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
783  #endif    #endif  
784      break;      break;
785      case LOG10:      case LOG10:
786      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
787      break;      break;
788      case LOG:      case LOG:
789      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
790      break;      break;
791      case SIGN:      case SIGN:
792      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
793      break;      break;
794      case ABS:      case ABS:
795      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
796      break;      break;
797      case NEG:      case NEG:
798      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 487  DataLazy::resolveUnary(ValueType& v, siz Line 803  DataLazy::resolveUnary(ValueType& v, siz
803      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
804      break;      break;
805      case EXP:      case EXP:
806      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
807      break;      break;
808      case SQRT:      case SQRT:
809      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
810      break;      break;
811      case RECIP:      case RECIP:
812      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 515  DataLazy::resolveUnary(ValueType& v, siz Line 831  DataLazy::resolveUnary(ValueType& v, siz
831  }  }
832    
833    
834    /*
835      \brief Compute the value of the expression (unary operation) for the given sample.
836      \return Vector which stores the value of the subexpression for the given sample.
837      \param v A vector to store intermediate results.
838      \param offset Index in v to begin storing results.
839      \param sampleNo Sample number to evaluate.
840      \param roffset (output parameter) the offset in the return vector where the result begins.
841    
842      The return value will be an existing vector so do not deallocate it.
843      If the result is stored in v it should be stored at the offset given.
844      Everything from offset to the end of v should be considered available for this method to use.
845    */
846    DataTypes::ValueType*
847    DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
848    {
849        // we assume that any collapsing has been done before we get here
850        // since we only have one argument we don't need to think about only
851        // processing single points.
852      if (m_readytype!='E')
853      {
854        throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
855      }
856        // since we can't write the result over the input, we need a result offset further along
857      size_t subroffset=roffset+m_samplesize;
858      const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);
859      roffset=offset;
860      switch (m_op)
861      {
862        case SYM:
863        DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
864        break;
865        case NSYM:
866        DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
867        break;
868        default:
869        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
870      }
871      return &v;
872    }
873    
874    /*
875      \brief Compute the value of the expression (unary operation) for the given sample.
876      \return Vector which stores the value of the subexpression for the given sample.
877      \param v A vector to store intermediate results.
878      \param offset Index in v to begin storing results.
879      \param sampleNo Sample number to evaluate.
880      \param roffset (output parameter) the offset in the return vector where the result begins.
881    
882      The return value will be an existing vector so do not deallocate it.
883      If the result is stored in v it should be stored at the offset given.
884      Everything from offset to the end of v should be considered available for this method to use.
885    */
886    DataTypes::ValueType*
887    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
888    {
889        // we assume that any collapsing has been done before we get here
890        // since we only have one argument we don't need to think about only
891        // processing single points.
892      if (m_readytype!='E')
893      {
894        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
895      }
896        // since we can't write the result over the input, we need a result offset further along
897      size_t subroffset=roffset+m_samplesize;
898      const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);
899      roffset=offset;
900      switch (m_op)
901      {
902        case TRACE:
903             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
904        break;
905        case TRANS:
906             DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
907        break;
908        default:
909        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
910      }
911      return &v;
912    }
913    
 // const double*  
 // DataLazy::resolveUnary(ValueType& v,int sampleNo,  size_t offset) const  
 // {  
 //  // we assume that any collapsing has been done before we get here  
 //  // since we only have one argument we don't need to think about only  
 //  // processing single points.  
 //   if (m_readytype!='E')  
 //   {  
 //     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
 //   }  
 //   const double* left=m_left->resolveSample(v,sampleNo,offset);  
 //   double* result=&(v[offset]);  
 //   switch (m_op)  
 //   {  
 //     case SIN:      
 //  tensor_unary_operation(m_samplesize, left, result, ::sin);  
 //  break;  
 //     case COS:  
 //  tensor_unary_operation(m_samplesize, left, result, ::cos);  
 //  break;  
 //     case TAN:  
 //  tensor_unary_operation(m_samplesize, left, result, ::tan);  
 //  break;  
 //     case ASIN:  
 //  tensor_unary_operation(m_samplesize, left, result, ::asin);  
 //  break;  
 //     case ACOS:  
 //  tensor_unary_operation(m_samplesize, left, result, ::acos);  
 //  break;  
 //     case ATAN:  
 //  tensor_unary_operation(m_samplesize, left, result, ::atan);  
 //  break;  
 //     case SINH:  
 //  tensor_unary_operation(m_samplesize, left, result, ::sinh);  
 //  break;  
 //     case COSH:  
 //  tensor_unary_operation(m_samplesize, left, result, ::cosh);  
 //  break;  
 //     case TANH:  
 //  tensor_unary_operation(m_samplesize, left, result, ::tanh);  
 //  break;  
 //     case ERF:  
 // #ifdef _WIN32  
 //  throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::erf);  
 //  break;  
 // #endif  
 //    case ASINH:  
 // #ifdef _WIN32  
 //  tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 // #endif    
 //  break;  
 //    case ACOSH:  
 // #ifdef _WIN32  
 //  tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 // #endif    
 //  break;  
 //    case ATANH:  
 // #ifdef _WIN32  
 //  tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 // #endif    
 //  break;  
 //     case LOG10:  
 //  tensor_unary_operation(m_samplesize, left, result, ::log10);  
 //  break;  
 //     case LOG:  
 //  tensor_unary_operation(m_samplesize, left, result, ::log);  
 //  break;  
 //     case SIGN:  
 //  tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
 //  break;  
 //     case ABS:  
 //  tensor_unary_operation(m_samplesize, left, result, ::fabs);  
 //  break;  
 //     case NEG:  
 //  tensor_unary_operation(m_samplesize, left, result, negate<double>());  
 //  break;  
 //     case POS:  
 //  // it doesn't mean anything for delayed.  
 //  // it will just trigger a deep copy of the lazy object  
 //  throw DataException("Programmer error - POS not supported for lazy data.");  
 //  break;  
 //     case EXP:  
 //  tensor_unary_operation(m_samplesize, left, result, ::exp);  
 //  break;  
 //     case SQRT:  
 //  tensor_unary_operation(m_samplesize, left, result, ::sqrt);  
 //  break;  
 //     case RECIP:  
 //  tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
 //  break;  
 //     case GZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
 //  break;  
 //     case LZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
 //  break;  
 //     case GEZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
 //  break;  
 //     case LEZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
 //  break;  
 //  
 //     default:  
 //  throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");  
 //   }  
 //   return result;  
 // }  
914    
915  #define PROC_OP(X) \  #define PROC_OP(TYPE,X)                               \
916      for (int i=0;i<steps;++i,resultp+=getNoValues()) \      for (int i=0;i<steps;++i,resultp+=resultStep) \
917      { \      { \
918  cout << "Step#" << i << " chunk=" << chunksize << endl; \         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
919  cout << left[0] << left[1] << left[2] << endl; \         lroffset+=leftStep; \
920  cout << right[0] << right[1] << right[2] << endl; \         rroffset+=rightStep; \
        tensor_binary_operation(chunksize, left, right, resultp, X); \  
        left+=leftStep; \  
        right+=rightStep; \  
 cout << "Result=" << result << " " << result[0] << result[1] << result[2] << endl; \  
921      }      }
922    
923    /*
924      \brief Compute the value of the expression (binary operation) for the given sample.
925      \return Vector which stores the value of the subexpression for the given sample.
926      \param v A vector to store intermediate results.
927      \param offset Index in v to begin storing results.
928      \param sampleNo Sample number to evaluate.
929      \param roffset (output parameter) the offset in the return vector where the result begins.
930    
931      The return value will be an existing vector so do not deallocate it.
932      If the result is stored in v it should be stored at the offset given.
933      Everything from offset to the end of v should be considered available for this method to use.
934    */
935    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
936    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
937    // If both children are expanded, then we can process them in a single operation (we treat
938    // the whole sample as one big datapoint.
939    // If one of the children is not expanded, then we need to treat each point in the sample
940    // individually.
941    // There is an additional complication when scalar operations are considered.
942    // For example, 2+Vector.
943    // In this case each double within the point is treated individually
944  DataTypes::ValueType*  DataTypes::ValueType*
945  DataLazy::resolveBinary(ValueType& v,  size_t offset ,int sampleNo, size_t& roffset) const  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
946  {  {
     // again we assume that all collapsing has already been done  
     // so we have at least one expanded child.  
     // however, we could still have one of the children being not expanded.  
   
947  cout << "Resolve binary: " << toString() << endl;  cout << "Resolve binary: " << toString() << endl;
948    
949    size_t lroffset=0, rroffset=0;    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
950        // first work out which of the children are expanded
951    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
952    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
953    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step    if (!leftExp && !rightExp)
954      {
955        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
956      }
957      bool leftScalar=(m_left->getRank()==0);
958      bool rightScalar=(m_right->getRank()==0);
959      bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?
960    int steps=(bigloops?1:getNumDPPSample());    int steps=(bigloops?1:getNumDPPSample());
961    size_t chunksize=(bigloops? m_samplesize : getNoValues());    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint
962    int leftStep=((leftExp && !rightExp)? getNoValues() : 0);    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops
963    int rightStep=((rightExp && !leftExp)? getNoValues() : 0);    {
964        if (!leftScalar && !rightScalar)
965        {
966           throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
967        }
968        steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());
969        chunksize=1;    // for scalar
970      }    
971      int leftStep=((leftExp && (!rightExp || rightScalar))? m_right->getNoValues() : 0);
972      int rightStep=((rightExp && (!leftExp || leftScalar))? m_left->getNoValues() : 0);
973      int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0
974        // Get the values of sub-expressions
975    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);
976    const ValueType* right=m_right->resolveSample(v,offset,sampleNo,rroffset);        const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note
977      // now we need to know which args are expanded      // the right child starts further along.
978  cout << "left=" << left << " right=" << right << endl;    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
 cout << "(Length) l=" << left->size() << " r=" << right->size() << " res=" << v.size() << endl;  
   double* resultp=&(v[offset]);  
979    switch(m_op)    switch(m_op)
980    {    {
981      case ADD:      case ADD:
982      for (int i=0;i<steps;++i,resultp+=getNoValues())          PROC_OP(NO_ARG,plus<double>());
983      {      break;
984  cerr << "Step#" << i << " chunk=" << chunksize << endl;      case SUB:
985  cerr << left << "[" << lroffset << "] " << right << "[" << rroffset << "]" << endl;      PROC_OP(NO_ARG,minus<double>());
986         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, plus<double>());      break;
987         lroffset+=leftStep;      case MUL:
988         rroffset+=rightStep;      PROC_OP(NO_ARG,multiplies<double>());
989  cerr << "left=" << lroffset << " right=" << rroffset << endl;      break;
990      }      case DIV:
991        PROC_OP(NO_ARG,divides<double>());
992        break;
993        case POW:
994           PROC_OP(double (double,double),::pow);
995      break;      break;
 // need to fill in the rest  
996      default:      default:
997      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
998    }    }
999    roffset=offset;    roffset=offset;  
1000    return &v;    return &v;
1001  }  }
1002    
1003    
1004    /*
1005      \brief Compute the value of the expression (tensor product) for the given sample.
1006      \return Vector which stores the value of the subexpression for the given sample.
1007      \param v A vector to store intermediate results.
1008      \param offset Index in v to begin storing results.
1009      \param sampleNo Sample number to evaluate.
1010      \param roffset (output parameter) the offset in the return vector where the result begins.
1011    
1012      The return value will be an existing vector so do not deallocate it.
1013      If the result is stored in v it should be stored at the offset given.
1014      Everything from offset to the end of v should be considered available for this method to use.
1015    */
1016    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1017    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1018    // unlike the other resolve helpers, we must treat these datapoints separately.
1019    DataTypes::ValueType*
1020    DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1021    {
1022    cout << "Resolve TensorProduct: " << toString() << endl;
1023    
1024  // #define PROC_OP(X) \    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1025  //  for (int i=0;i<steps;++i,resultp+=getNoValues()) \      // first work out which of the children are expanded
1026  //  { \    bool leftExp=(m_left->m_readytype=='E');
1027  // cout << "Step#" << i << " chunk=" << chunksize << endl; \    bool rightExp=(m_right->m_readytype=='E');
1028  // cout << left[0] << left[1] << left[2] << endl; \    int steps=getNumDPPSample();
1029  // cout << right[0] << right[1] << right[2] << endl; \    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1030  //     tensor_binary_operation(chunksize, left, right, resultp, X); \    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1031  //     left+=leftStep; \    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0
1032  //     right+=rightStep; \      // Get the values of sub-expressions (leave a gap of one sample for the result).
1033  // cout << "Result=" << result << " " << result[0] << result[1] << result[2] << endl; \    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);
1034  //  }    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);
1035  //    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1036  // const double*    switch(m_op)
1037  // DataLazy::resolveBinary(ValueType& v,int sampleNo,  size_t offset) const    {
1038  // {      case PROD:
1039  //  // again we assume that all collapsing has already been done      for (int i=0;i<steps;++i,resultp+=resultStep)
1040  //  // so we have at least one expanded child.      {
1041  //  // however, we could still have one of the children being not expanded.            const double *ptr_0 = &((*left)[lroffset]);
1042  //            const double *ptr_1 = &((*right)[rroffset]);
1043  // cout << "Resolve binary: " << toString() << endl;            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1044  //        lroffset+=leftStep;
1045  //   const double* left=m_left->resolveSample(v,sampleNo,offset);        rroffset+=rightStep;
1046  // // cout << "Done Left " << /*left[0] << left[1] << left[2] << */endl;      }
1047  //   const double* right=m_right->resolveSample(v,sampleNo,offset);      break;
1048  // // cout << "Done Right"  << /*right[0] << right[1] << right[2] <<*/ endl;      default:
1049  //      // now we need to know which args are expanded      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1050  //   bool leftExp=(m_left->m_readytype=='E');    }
1051  //   bool rightExp=(m_right->m_readytype=='E');    roffset=offset;
1052  //   bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp));  // is processing in single step    return &v;
1053  //   int steps=(bigloops?1:getNumSamples());  }
 //   size_t chunksize=(bigloops? m_samplesize : getNoValues());  
 //   int leftStep=((leftExp && !rightExp)? getNoValues() : 0);  
 //   int rightStep=((rightExp && !leftExp)? getNoValues() : 0);  
 // cout << "left=" << left << " right=" << right << endl;  
 //   double* result=&(v[offset]);  
 //   double* resultp=result;  
 //   switch(m_op)  
 //   {  
 //     case ADD:  
 //  for (int i=0;i<steps;++i,resultp+=getNoValues())  
 //  {  
 // cout << "Step#" << i << " chunk=" << chunksize << endl; \  
 // // cout << left[0] << left[1] << left[2] << endl;  
 // // cout << right[0] << right[1] << right[2] << endl;  
 //     tensor_binary_operation(chunksize, left, right, resultp, plus<double>());  
 // cout << "left=" << left << " right=" << right << " resp=" << resultp << endl;  
 //     left+=leftStep;  
 //     right+=rightStep;  
 // cout << "left=" << left << " right=" << right << endl;  
 // // cout << "Result=" << result << " " << result[0] << result[1] << result[2] << endl;  
 //  }  
 //  break;  
 // // need to fill in the rest  
 //     default:  
 //  throw DataException("Programmer error - resolveBinay can not resolve operator "+opToString(m_op)+".");  
 //   }  
 // // cout << "About to return "  << result[0] << result[1] << result[2] << endl;;  
 //   return result;  
 // }  
1054    
 // // the vector and the offset are a place where the method could write its data if it wishes  
 // // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // // Hence the return value to indicate where the data is actually stored.  
 // // Regardless, the storage should be assumed to be used, even if it isn't.  
 // const double*  
 // DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset )  
 // {  
 // cout << "Resolve sample " << toString() << endl;  
 //  // collapse so we have a 'E' node or an IDENTITY for some other type  
 //   if (m_readytype!='E' && m_op!=IDENTITY)  
 //   {  
 //  collapse();  
 //   }  
 //   if (m_op==IDENTITY)      
 //   {  
 //     const ValueType& vec=m_id->getVector();  
 //     if (m_readytype=='C')  
 //     {  
 //  return &(vec[0]);  
 //     }  
 //     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
 //   }  
 //   if (m_readytype!='E')  
 //   {  
 //     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
 //   }  
 //   switch (getOpgroup(m_op))  
 //   {  
 //   case G_UNARY: return resolveUnary(v,sampleNo,offset);  
 //   case G_BINARY: return resolveBinary(v,sampleNo,offset);  
 //   default:  
 //     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
 //   }  
 // }  
1055    
1056    
1057    /*
1058      \brief Compute the value of the expression for the given sample.
1059      \return Vector which stores the value of the subexpression for the given sample.
1060      \param v A vector to store intermediate results.
1061      \param offset Index in v to begin storing results.
1062      \param sampleNo Sample number to evaluate.
1063      \param roffset (output parameter) the offset in the return vector where the result begins.
1064    
1065      The return value will be an existing vector so do not deallocate it.
1066    */
1067  // the vector and the offset are a place where the method could write its data if it wishes  // the vector and the offset are a place where the method could write its data if it wishes
1068  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1069  // Hence the return value to indicate where the data is actually stored.  // Hence the return value to indicate where the data is actually stored.
# Line 824  cout << "Resolve sample " << toString() Line 1098  cout << "Resolve sample " << toString()
1098    {    {
1099    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);
1100    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1101      case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1102      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1103      case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1104    default:    default:
1105      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1106    }    }
1107  }  }
1108    
1109    
1110    // To simplify the memory management, all threads operate on one large vector, rather than one each.
1111    // Each sample is evaluated independently and copied into the result DataExpanded.
 // This version uses double* trying again with vectors  
 // DataReady_ptr  
 // DataLazy::resolve()  
 // {  
 //  
 // cout << "Sample size=" << m_samplesize << endl;  
 // cout << "Buffers=" << m_buffsRequired << endl;  
 //  
 //   if (m_readytype!='E')  
 //   {  
 //     collapse();  
 //   }  
 //   if (m_op==IDENTITY)  
 //   {  
 //     return m_id;  
 //   }  
 //      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'  
 //   size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  
 //   int numthreads=1;  
 // #ifdef _OPENMP  
 //   numthreads=getNumberOfThreads();  
 //   int threadnum=0;  
 // #endif  
 //   ValueType v(numthreads*threadbuffersize);    
 // cout << "Buffer created with size=" << v.size() << endl;  
 //   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
 //   ValueType& resvec=result->getVector();  
 //   DataReady_ptr resptr=DataReady_ptr(result);  
 //   int sample;  
 //   int resoffset;  
 //   int totalsamples=getNumSamples();  
 //   const double* res=0;  
 //   #pragma omp parallel for private(sample,resoffset,threadnum,res) schedule(static)  
 //   for (sample=0;sample<totalsamples;++sample)  
 //   {  
 // cout << "################################# " << sample << endl;  
 // #ifdef _OPENMP  
 //     res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());  
 // #else  
 //     res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.  
 // #endif  
 // cerr << "-------------------------------- " << endl;  
 //     resoffset=result->getPointOffset(sample,0);  
 // cerr << "offset=" << resoffset << endl;  
 //     for (unsigned int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  
 //     {  
 //  resvec[resoffset]=res[i];  
 //     }  
 // cerr << "*********************************" << endl;  
 //   }  
 //   return resptr;  
 // }  
   
   
1112  DataReady_ptr  DataReady_ptr
1113  DataLazy::resolve()  DataLazy::resolve()
1114  {  {
# Line 893  DataLazy::resolve() Line 1116  DataLazy::resolve()
1116  cout << "Sample size=" << m_samplesize << endl;  cout << "Sample size=" << m_samplesize << endl;
1117  cout << "Buffers=" << m_buffsRequired << endl;  cout << "Buffers=" << m_buffsRequired << endl;
1118    
1119    if (m_readytype!='E')    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1120    {    {
1121      collapse();      collapse();
1122    }    }
1123    if (m_op==IDENTITY)    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1124    {    {
1125      return m_id;      return m_id;
1126    }    }
1127      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1128    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);    size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1129        // storage to evaluate its expression
1130    int numthreads=1;    int numthreads=1;
1131  #ifdef _OPENMP  #ifdef _OPENMP
1132    numthreads=getNumberOfThreads();    numthreads=getNumberOfThreads();
   int threadnum=0;  
1133  #endif  #endif
1134    ValueType v(numthreads*threadbuffersize);    ValueType v(numthreads*threadbuffersize);
1135  cout << "Buffer created with size=" << v.size() << endl;  cout << "Buffer created with size=" << v.size() << endl;
# Line 916  cout << "Buffer created with size=" << v Line 1139  cout << "Buffer created with size=" << v
1139    int sample;    int sample;
1140    size_t outoffset;     // offset in the output data    size_t outoffset;     // offset in the output data
1141    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1142    const ValueType* res=0;    const ValueType* res=0;   // Vector storing the answer
1143    size_t resoffset=0;    size_t resoffset=0;       // where in the vector to find the answer
1144    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1145    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
1146    {    {
1147  cout << "################################# " << sample << endl;  cout << "################################# " << sample << endl;
1148  #ifdef _OPENMP  #ifdef _OPENMP
1149      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1150  #else  #else
1151      res=resolveSample(v,0,sample,resoffset);   // this would normally be v, but not if its a single IDENTITY op.      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
1152  #endif  #endif
1153  cerr << "-------------------------------- " << endl;  cerr << "-------------------------------- " << endl;
1154      outoffset=result->getPointOffset(sample,0);      outoffset=result->getPointOffset(sample,0);
# Line 948  DataLazy::toString() const Line 1171  DataLazy::toString() const
1171    return oss.str();    return oss.str();
1172  }  }
1173    
1174    
1175  void  void
1176  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1177  {  {
# Line 980  DataLazy::intoString(ostringstream& oss) Line 1204  DataLazy::intoString(ostringstream& oss)
1204      oss << ')';      oss << ')';
1205      break;      break;
1206    case G_UNARY:    case G_UNARY:
1207      case G_NP1OUT:
1208      case G_NP1OUT_P:
1209      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1210      m_left->intoString(oss);      m_left->intoString(oss);
1211      oss << ')';      oss << ')';
1212      break;      break;
1213      case G_TENSORPROD:
1214        oss << opToString(m_op) << '(';
1215        m_left->intoString(oss);
1216        oss << ", ";
1217        m_right->intoString(oss);
1218        oss << ')';
1219        break;
1220    default:    default:
1221      oss << "UNKNOWN";      oss << "UNKNOWN";
1222    }    }
1223  }  }
1224    
 // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
1225  DataAbstract*  DataAbstract*
1226  DataLazy::deepCopy()  DataLazy::deepCopy()
1227  {  {
1228    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1229    {    {
1230      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1231      case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1232      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1233      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1234      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1235      default:
1236        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1237    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1238  }  }
1239    
1240    
1241    // There is no single, natural interpretation of getLength on DataLazy.
1242    // Instances of DataReady can look at the size of their vectors.
1243    // For lazy though, it could be the size the data would be if it were resolved;
1244    // or it could be some function of the lengths of the DataReady instances which
1245    // form part of the expression.
1246    // Rather than have people making assumptions, I have disabled the method.
1247  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1248  DataLazy::getLength() const  DataLazy::getLength() const
1249  {  {
1250    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1251  }  }
1252    
1253    
# Line 1015  DataLazy::getSlice(const DataTypes::Regi Line 1257  DataLazy::getSlice(const DataTypes::Regi
1257    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1258  }  }
1259    
1260    
1261    // To do this we need to rely on our child nodes
1262    DataTypes::ValueType::size_type
1263    DataLazy::getPointOffset(int sampleNo,
1264                     int dataPointNo)
1265    {
1266      if (m_op==IDENTITY)
1267      {
1268        return m_id->getPointOffset(sampleNo,dataPointNo);
1269      }
1270      if (m_readytype!='E')
1271      {
1272        collapse();
1273        return m_id->getPointOffset(sampleNo,dataPointNo);
1274      }
1275      // at this point we do not have an identity node and the expression will be Expanded
1276      // so we only need to know which child to ask
1277      if (m_left->m_readytype=='E')
1278      {
1279        return m_left->getPointOffset(sampleNo,dataPointNo);
1280      }
1281      else
1282      {
1283        return m_right->getPointOffset(sampleNo,dataPointNo);
1284      }
1285    }
1286    
1287    // To do this we need to rely on our child nodes
1288  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1289  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1290                   int dataPointNo) const                   int dataPointNo) const
1291  {  {
1292    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1293      {
1294        return m_id->getPointOffset(sampleNo,dataPointNo);
1295      }
1296      if (m_readytype=='E')
1297      {
1298        // at this point we do not have an identity node and the expression will be Expanded
1299        // so we only need to know which child to ask
1300        if (m_left->m_readytype=='E')
1301        {
1302        return m_left->getPointOffset(sampleNo,dataPointNo);
1303        }
1304        else
1305        {
1306        return m_right->getPointOffset(sampleNo,dataPointNo);
1307        }
1308      }
1309      if (m_readytype=='C')
1310      {
1311        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1312      }
1313      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1314    }
1315    
1316    // It would seem that DataTagged will need to be treated differently since even after setting all tags
1317    // to zero, all the tags from all the DataTags would be in the result.
1318    // However since they all have the same value (0) whether they are there or not should not matter.
1319    // So I have decided that for all types this method will create a constant 0.
1320    // It can be promoted up as required.
1321    // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1322    // but we can deal with that if it arrises.
1323    void
1324    DataLazy::setToZero()
1325    {
1326      DataTypes::ValueType v(getNoValues(),0);
1327      m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1328      m_op=IDENTITY;
1329      m_right.reset();  
1330      m_left.reset();
1331      m_readytype='C';
1332      m_buffsRequired=1;
1333  }  }
1334    
1335  }   // end namespace  }   // end namespace

Legend:
Removed from v.1898  
changed lines
  Added in v.2085

  ViewVC Help
Powered by ViewVC 1.1.26