/[escript]/trunk/escript/src/DataLazy.cpp
ViewVC logotype

Diff of /trunk/escript/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1865 by jfenwick, Thu Oct 9 03:53:57 2008 UTC trunk/escript/src/DataLazy.cpp revision 2157 by jfenwick, Mon Dec 15 06:05:58 2008 UTC
# Line 19  Line 19 
19  #ifdef PASO_MPI  #ifdef PASO_MPI
20  #include <mpi.h>  #include <mpi.h>
21  #endif  #endif
22    #ifdef _OPENMP
23    #include <omp.h>
24    #endif
25  #include "FunctionSpace.h"  #include "FunctionSpace.h"
26  #include "DataTypes.h"  #include "DataTypes.h"
27    #include "Data.h"
28    #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    // #define LAZYDEBUG(X) if (privdebug){X;}
32    #define LAZYDEBUG(X)
33    namespace
34    {
35    bool privdebug=false;
36    
37    #define ENABLEDEBUG privdebug=true;
38    #define DISABLEDEBUG privdebug=false;
39    }
40    
41    /*
42    How does DataLazy work?
43    ~~~~~~~~~~~~~~~~~~~~~~~
44    
45    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
46    denoted left and right.
47    
48    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
49    This means that all "internal" nodes in the structure are instances of DataLazy.
50    
51    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
52    Note that IDENITY is not considered a unary operation.
53    
54    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
55    It must however form a DAG (directed acyclic graph).
56    I will refer to individual DataLazy objects with the structure as nodes.
57    
58    Each node also stores:
59    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
60        evaluated.
61    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
62        evaluate the expression.
63    - m_samplesize ~ the number of doubles stored in a sample.
64    
65    When a new node is created, the above values are computed based on the values in the child nodes.
66    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
67    
68    The resolve method, which produces a DataReady from a DataLazy, does the following:
69    1) Create a DataReady to hold the new result.
70    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
71    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
72    
73    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
74    
75    resolveSample returns a Vector* and an offset within that vector where the result is stored.
76    Normally, this would be v, but for identity nodes their internal vector is returned instead.
77    
78    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
79    
80    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
81    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
82    
83    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
84    1) Add to the ES_optype.
85    2) determine what opgroup your operation belongs to (X)
86    3) add a string for the op to the end of ES_opstrings
87    4) increase ES_opcount
88    5) add an entry (X) to opgroups
89    6) add an entry to the switch in collapseToReady
90    7) add an entry to resolveX
91    */
92    
93    
94  using namespace std;  using namespace std;
95    using namespace boost;
96    
97  namespace escript  namespace escript
98  {  {
99    
 const std::string&  
 opToString(ES_optype op);  
   
100  namespace  namespace
101  {  {
102    
103    enum ES_opgroup
104    {
105       G_UNKNOWN,
106       G_IDENTITY,
107       G_BINARY,        // pointwise operations with two arguments
108       G_UNARY,     // pointwise operations with one argument
109       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
110       G_NP1OUT,        // non-pointwise op with one output
111       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
112       G_TENSORPROD     // general tensor product
113    };
114    
115    
116    
117    
118    string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
119                "sin","cos","tan",
120                "asin","acos","atan","sinh","cosh","tanh","erf",
121                "asinh","acosh","atanh",
122                "log10","log","sign","abs","neg","pos","exp","sqrt",
123                "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
124                "symmetric","nonsymmetric",
125                "prod",
126                "transpose", "trace"};
127    int ES_opcount=40;
128    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
129                G_UNARY,G_UNARY,G_UNARY, //10
130                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
131                G_UNARY,G_UNARY,G_UNARY,                    // 20
132                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
133                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
134                G_NP1OUT,G_NP1OUT,
135                G_TENSORPROD,
136                G_NP1OUT_P, G_NP1OUT_P};
137    inline
138    ES_opgroup
139    getOpgroup(ES_optype op)
140    {
141      return opgroups[op];
142    }
143    
144  // return the FunctionSpace of the result of "left op right"  // return the FunctionSpace of the result of "left op right"
145  FunctionSpace  FunctionSpace
146  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
# Line 41  resultFS(DataAbstract_ptr left, DataAbst Line 149  resultFS(DataAbstract_ptr left, DataAbst
149      // maybe we need an interpolate node -      // maybe we need an interpolate node -
150      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
151      // programming error exception.      // programming error exception.
152      return left->getFunctionSpace();  
153      FunctionSpace l=left->getFunctionSpace();
154      FunctionSpace r=right->getFunctionSpace();
155      if (l!=r)
156      {
157        if (r.probeInterpolation(l))
158        {
159        return l;
160        }
161        if (l.probeInterpolation(r))
162        {
163        return r;
164        }
165        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
166      }
167      return l;
168  }  }
169    
170  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
171    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
172  DataTypes::ShapeType  DataTypes::ShapeType
173  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
174  {  {
175      return DataTypes::scalarShape;      if (left->getShape()!=right->getShape())
176        {
177          if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
178          {
179            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
180          }
181          if (left->getRank()==0)   // we need to allow scalar * anything
182          {
183            return right->getShape();
184          }
185          if (right->getRank()==0)
186          {
187            return left->getShape();
188          }
189          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
190        }
191        return left->getShape();
192  }  }
193    
194  size_t  // return the shape for "op left"
195  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
196    DataTypes::ShapeType
197    resultShape(DataAbstract_ptr left, ES_optype op)
198    {
199        switch(op)
200        {
201            case TRANS:
202            return left->getShape();
203        break;
204        case TRACE:
205            return DataTypes::scalarShape;
206        break;
207            default:
208        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
209        }
210    }
211    
212    // determine the output shape for the general tensor product operation
213    // the additional parameters return information required later for the product
214    // the majority of this code is copy pasted from C_General_Tensor_Product
215    DataTypes::ShapeType
216    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
217    {
218        
219      // Get rank and shape of inputs
220      int rank0 = left->getRank();
221      int rank1 = right->getRank();
222      const DataTypes::ShapeType& shape0 = left->getShape();
223      const DataTypes::ShapeType& shape1 = right->getShape();
224    
225      // Prepare for the loops of the product and verify compatibility of shapes
226      int start0=0, start1=0;
227      if (transpose == 0)       {}
228      else if (transpose == 1)  { start0 = axis_offset; }
229      else if (transpose == 2)  { start1 = rank1-axis_offset; }
230      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
231    
232      if (rank0<axis_offset)
233      {
234        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
235      }
236    
237      // Adjust the shapes for transpose
238      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
239      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
240      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
241      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
242    
243      // Prepare for the loops of the product
244      SL=1, SM=1, SR=1;
245      for (int i=0; i<rank0-axis_offset; i++)   {
246        SL *= tmpShape0[i];
247      }
248      for (int i=rank0-axis_offset; i<rank0; i++)   {
249        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
250          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
251        }
252        SM *= tmpShape0[i];
253      }
254      for (int i=axis_offset; i<rank1; i++)     {
255        SR *= tmpShape1[i];
256      }
257    
258      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
259      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
260      {         // block to limit the scope of out_index
261         int out_index=0;
262         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
263         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
264      }
265    
266      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
267      {
268         ostringstream os;
269         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
270         throw DataException(os.str());
271      }
272    
273      return shape2;
274    }
275    
276    
277    // determine the number of points in the result of "left op right"
278    // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here
279    // size_t
280    // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
281    // {
282    //    switch (getOpgroup(op))
283    //    {
284    //    case G_BINARY: return left->getLength();
285    //    case G_UNARY: return left->getLength();
286    //    case G_NP1OUT: return left->getLength();
287    //    default:
288    //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");
289    //    }
290    // }
291    
292    // determine the number of samples requires to evaluate an expression combining left and right
293    // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
294    // The same goes for G_TENSORPROD
295    // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefths.
296    // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
297    // multiple times
298    int
299    calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
300  {  {
301     switch(op)     switch(getOpgroup(op))
302     {     {
303     case IDENTITY: return left->getLength();     case G_IDENTITY: return 1;
304       case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
305       case G_UNARY:
306       case G_UNARY_P: return max(left->getBuffsRequired(),1);
307       case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
308       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
309       case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
310     default:     default:
311      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
   
312     }     }
313  }  }
314    
 string ES_opstrings[]={"UNKNOWN","IDENTITY"};  
 int ES_opcount=2;  
315    
316  }   // end anonymous namespace  }   // end anonymous namespace
317    
318    
319    
320    // Return a string representing the operation
321  const std::string&  const std::string&
322  opToString(ES_optype op)  opToString(ES_optype op)
323  {  {
# Line 82  opToString(ES_optype op) Line 331  opToString(ES_optype op)
331    
332  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
333      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape()),
334      m_left(p),      m_op(IDENTITY),
335      m_op(IDENTITY)      m_axis_offset(0),
336        m_transpose(0),
337        m_SL(0), m_SM(0), m_SR(0)
338    {
339       if (p->isLazy())
340       {
341        // I don't want identity of Lazy.
342        // Question: Why would that be so bad?
343        // Answer: We assume that the child of ID is something we can call getVector on
344        throw DataException("Programmer error - attempt to create identity from a DataLazy.");
345       }
346       else
347       {
348        m_id=dynamic_pointer_cast<DataReady>(p);
349        if(p->isConstant()) {m_readytype='C';}
350        else if(p->isExpanded()) {m_readytype='E';}
351        else if (p->isTagged()) {m_readytype='T';}
352        else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}
353       }
354       m_buffsRequired=1;
355       m_samplesize=getNumDPPSample()*getNoValues();
356       m_maxsamplesize=m_samplesize;
357    LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
358    }
359    
360    
361    
362    
363    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
364        : parent(left->getFunctionSpace(),left->getShape()),
365        m_op(op),
366        m_axis_offset(0),
367        m_transpose(0),
368        m_SL(0), m_SM(0), m_SR(0)
369  {  {
370     length=resultLength(m_left,m_right,m_op);     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
371       {
372        throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
373       }
374    
375       DataLazy_ptr lleft;
376       if (!left->isLazy())
377       {
378        lleft=DataLazy_ptr(new DataLazy(left));
379       }
380       else
381       {
382        lleft=dynamic_pointer_cast<DataLazy>(left);
383       }
384       m_readytype=lleft->m_readytype;
385       m_left=lleft;
386       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
387       m_samplesize=getNumDPPSample()*getNoValues();
388       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
389  }  }
390    
391    
392    // In this constructor we need to consider interpolation
393  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
394      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
395      m_left(left),      m_op(op),
396      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
397  {  {
398     length=resultLength(m_left,m_right,m_op);     if ((getOpgroup(op)!=G_BINARY))
399       {
400        throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
401       }
402    
403       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
404       {
405        FunctionSpace fs=getFunctionSpace();
406        Data ltemp(left);
407        Data tmp(ltemp,fs);
408        left=tmp.borrowDataPtr();
409       }
410       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
411       {
412        Data tmp(Data(right),getFunctionSpace());
413        right=tmp.borrowDataPtr();
414       }
415       left->operandCheck(*right);
416    
417       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
418       {
419        m_left=dynamic_pointer_cast<DataLazy>(left);
420       }
421       else
422       {
423        m_left=DataLazy_ptr(new DataLazy(left));
424       }
425       if (right->isLazy())
426       {
427        m_right=dynamic_pointer_cast<DataLazy>(right);
428       }
429       else
430       {
431        m_right=DataLazy_ptr(new DataLazy(right));
432       }
433       char lt=m_left->m_readytype;
434       char rt=m_right->m_readytype;
435       if (lt=='E' || rt=='E')
436       {
437        m_readytype='E';
438       }
439       else if (lt=='T' || rt=='T')
440       {
441        m_readytype='T';
442       }
443       else
444       {
445        m_readytype='C';
446       }
447       m_samplesize=getNumDPPSample()*getNoValues();
448       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
449       m_buffsRequired=calcBuffs(m_left, m_right,m_op);
450    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
451  }  }
452    
453    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
454        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
455        m_op(op),
456        m_axis_offset(axis_offset),
457        m_transpose(transpose)
458    {
459       if ((getOpgroup(op)!=G_TENSORPROD))
460       {
461        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
462       }
463       if ((transpose>2) || (transpose<0))
464       {
465        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
466       }
467       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
468       {
469        FunctionSpace fs=getFunctionSpace();
470        Data ltemp(left);
471        Data tmp(ltemp,fs);
472        left=tmp.borrowDataPtr();
473       }
474       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
475       {
476        Data tmp(Data(right),getFunctionSpace());
477        right=tmp.borrowDataPtr();
478       }
479       left->operandCheck(*right);
480    
481       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
482       {
483        m_left=dynamic_pointer_cast<DataLazy>(left);
484       }
485       else
486       {
487        m_left=DataLazy_ptr(new DataLazy(left));
488       }
489       if (right->isLazy())
490       {
491        m_right=dynamic_pointer_cast<DataLazy>(right);
492       }
493       else
494       {
495        m_right=DataLazy_ptr(new DataLazy(right));
496       }
497       char lt=m_left->m_readytype;
498       char rt=m_right->m_readytype;
499       if (lt=='E' || rt=='E')
500       {
501        m_readytype='E';
502       }
503       else if (lt=='T' || rt=='T')
504       {
505        m_readytype='T';
506       }
507       else
508       {
509        m_readytype='C';
510       }
511       m_samplesize=getNumDPPSample()*getNoValues();
512       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
513       m_buffsRequired=calcBuffs(m_left, m_right,m_op);
514    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
515    }
516    
517    
518    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
519        : parent(left->getFunctionSpace(), resultShape(left,op)),
520        m_op(op),
521        m_axis_offset(axis_offset),
522        m_transpose(0),
523        m_tol(0)
524    {
525       if ((getOpgroup(op)!=G_NP1OUT_P))
526       {
527        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
528       }
529       DataLazy_ptr lleft;
530       if (!left->isLazy())
531       {
532        lleft=DataLazy_ptr(new DataLazy(left));
533       }
534       else
535       {
536        lleft=dynamic_pointer_cast<DataLazy>(left);
537       }
538       m_readytype=lleft->m_readytype;
539       m_left=lleft;
540       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
541       m_samplesize=getNumDPPSample()*getNoValues();
542       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
543    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
544    }
545    
546    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
547        : parent(left->getFunctionSpace(), left->getShape()),
548        m_op(op),
549        m_axis_offset(0),
550        m_transpose(0),
551        m_tol(tol)
552    {
553       if ((getOpgroup(op)!=G_UNARY_P))
554       {
555        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
556       }
557       DataLazy_ptr lleft;
558       if (!left->isLazy())
559       {
560        lleft=DataLazy_ptr(new DataLazy(left));
561       }
562       else
563       {
564        lleft=dynamic_pointer_cast<DataLazy>(left);
565       }
566       m_readytype=lleft->m_readytype;
567       m_left=lleft;
568       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
569       m_samplesize=getNumDPPSample()*getNoValues();
570       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
571    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
572    }
573    
574  DataLazy::~DataLazy()  DataLazy::~DataLazy()
575  {  {
576  }  }
577    
578  // If resolving records a pointer to the resolved Data we may need to rethink the const on this method  
579    int
580    DataLazy::getBuffsRequired() const
581    {
582        return m_buffsRequired;
583    }
584    
585    
586    size_t
587    DataLazy::getMaxSampleSize() const
588    {
589        return m_maxsamplesize;
590    }
591    
592    /*
593      \brief Evaluates the expression using methods on Data.
594      This does the work for the collapse method.
595      For reasons of efficiency do not call this method on DataExpanded nodes.
596    */
597  DataReady_ptr  DataReady_ptr
598  DataLazy::resolve()  DataLazy::collapseToReady()
599  {  {
600      if (m_readytype=='E')
601      { // this is more an efficiency concern than anything else
602        throw DataException("Programmer Error - do not use collapse on Expanded data.");
603      }
604    if (m_op==IDENTITY)    if (m_op==IDENTITY)
605    {    {
606      return m_left->resolve();      return m_id;
607    }    }
608    else    DataReady_ptr pleft=m_left->collapseToReady();
609      Data left(pleft);
610      Data right;
611      if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
612      {
613        right=Data(m_right->collapseToReady());
614      }
615      Data result;
616      switch(m_op)
617      {
618        case ADD:
619        result=left+right;
620        break;
621        case SUB:      
622        result=left-right;
623        break;
624        case MUL:      
625        result=left*right;
626        break;
627        case DIV:      
628        result=left/right;
629        break;
630        case SIN:
631        result=left.sin();  
632        break;
633        case COS:
634        result=left.cos();
635        break;
636        case TAN:
637        result=left.tan();
638        break;
639        case ASIN:
640        result=left.asin();
641        break;
642        case ACOS:
643        result=left.acos();
644        break;
645        case ATAN:
646        result=left.atan();
647        break;
648        case SINH:
649        result=left.sinh();
650        break;
651        case COSH:
652        result=left.cosh();
653        break;
654        case TANH:
655        result=left.tanh();
656        break;
657        case ERF:
658        result=left.erf();
659        break;
660       case ASINH:
661        result=left.asinh();
662        break;
663       case ACOSH:
664        result=left.acosh();
665        break;
666       case ATANH:
667        result=left.atanh();
668        break;
669        case LOG10:
670        result=left.log10();
671        break;
672        case LOG:
673        result=left.log();
674        break;
675        case SIGN:
676        result=left.sign();
677        break;
678        case ABS:
679        result=left.abs();
680        break;
681        case NEG:
682        result=left.neg();
683        break;
684        case POS:
685        // it doesn't mean anything for delayed.
686        // it will just trigger a deep copy of the lazy object
687        throw DataException("Programmer error - POS not supported for lazy data.");
688        break;
689        case EXP:
690        result=left.exp();
691        break;
692        case SQRT:
693        result=left.sqrt();
694        break;
695        case RECIP:
696        result=left.oneOver();
697        break;
698        case GZ:
699        result=left.wherePositive();
700        break;
701        case LZ:
702        result=left.whereNegative();
703        break;
704        case GEZ:
705        result=left.whereNonNegative();
706        break;
707        case LEZ:
708        result=left.whereNonPositive();
709        break;
710        case NEZ:
711        result=left.whereNonZero(m_tol);
712        break;
713        case EZ:
714        result=left.whereZero(m_tol);
715        break;
716        case SYM:
717        result=left.symmetric();
718        break;
719        case NSYM:
720        result=left.nonsymmetric();
721        break;
722        case PROD:
723        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
724        break;
725        case TRANS:
726        result=left.transpose(m_axis_offset);
727        break;
728        case TRACE:
729        result=left.trace(m_axis_offset);
730        break;
731        default:
732        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
733      }
734      return result.borrowReadyPtr();
735    }
736    
737    /*
738       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
739       This method uses the original methods on the Data class to evaluate the expressions.
740       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
741       the purpose of using DataLazy in the first place).
742    */
743    void
744    DataLazy::collapse()
745    {
746      if (m_op==IDENTITY)
747      {
748        return;
749      }
750      if (m_readytype=='E')
751      { // this is more an efficiency concern than anything else
752        throw DataException("Programmer Error - do not use collapse on Expanded data.");
753      }
754      m_id=collapseToReady();
755      m_op=IDENTITY;
756    }
757    
758    /*
759      \brief Compute the value of the expression (unary operation) for the given sample.
760      \return Vector which stores the value of the subexpression for the given sample.
761      \param v A vector to store intermediate results.
762      \param offset Index in v to begin storing results.
763      \param sampleNo Sample number to evaluate.
764      \param roffset (output parameter) the offset in the return vector where the result begins.
765    
766      The return value will be an existing vector so do not deallocate it.
767      If the result is stored in v it should be stored at the offset given.
768      Everything from offset to the end of v should be considered available for this method to use.
769    */
770    DataTypes::ValueType*
771    DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
772    {
773        // we assume that any collapsing has been done before we get here
774        // since we only have one argument we don't need to think about only
775        // processing single points.
776      if (m_readytype!='E')
777      {
778        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
779      }
780      const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);
781      const double* left=&((*vleft)[roffset]);
782      double* result=&(v[offset]);
783      roffset=offset;
784      switch (m_op)
785      {
786        case SIN:  
787        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
788        break;
789        case COS:
790        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
791        break;
792        case TAN:
793        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
794        break;
795        case ASIN:
796        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
797        break;
798        case ACOS:
799        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
800        break;
801        case ATAN:
802        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
803        break;
804        case SINH:
805        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
806        break;
807        case COSH:
808        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
809        break;
810        case TANH:
811        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
812        break;
813        case ERF:
814    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
815        throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
816    #else
817        tensor_unary_operation(m_samplesize, left, result, ::erf);
818        break;
819    #endif
820       case ASINH:
821    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
822        tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
823    #else
824        tensor_unary_operation(m_samplesize, left, result, ::asinh);
825    #endif  
826        break;
827       case ACOSH:
828    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
829        tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
830    #else
831        tensor_unary_operation(m_samplesize, left, result, ::acosh);
832    #endif  
833        break;
834       case ATANH:
835    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
836        tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
837    #else
838        tensor_unary_operation(m_samplesize, left, result, ::atanh);
839    #endif  
840        break;
841        case LOG10:
842        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
843        break;
844        case LOG:
845        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
846        break;
847        case SIGN:
848        tensor_unary_operation(m_samplesize, left, result, escript::fsign);
849        break;
850        case ABS:
851        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
852        break;
853        case NEG:
854        tensor_unary_operation(m_samplesize, left, result, negate<double>());
855        break;
856        case POS:
857        // it doesn't mean anything for delayed.
858        // it will just trigger a deep copy of the lazy object
859        throw DataException("Programmer error - POS not supported for lazy data.");
860        break;
861        case EXP:
862        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
863        break;
864        case SQRT:
865        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
866        break;
867        case RECIP:
868        tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
869        break;
870        case GZ:
871        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
872        break;
873        case LZ:
874        tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
875        break;
876        case GEZ:
877        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
878        break;
879        case LEZ:
880        tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
881        break;
882    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
883        case NEZ:
884        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
885        break;
886        case EZ:
887        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
888        break;
889    
890        default:
891        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
892      }
893      return &v;
894    }
895    
896    
897    
898    
899    
900    
901    /*
902      \brief Compute the value of the expression (unary operation) for the given sample.
903      \return Vector which stores the value of the subexpression for the given sample.
904      \param v A vector to store intermediate results.
905      \param offset Index in v to begin storing results.
906      \param sampleNo Sample number to evaluate.
907      \param roffset (output parameter) the offset in the return vector where the result begins.
908    
909      The return value will be an existing vector so do not deallocate it.
910      If the result is stored in v it should be stored at the offset given.
911      Everything from offset to the end of v should be considered available for this method to use.
912    */
913    DataTypes::ValueType*
914    DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
915    {
916        // we assume that any collapsing has been done before we get here
917        // since we only have one argument we don't need to think about only
918        // processing single points.
919      if (m_readytype!='E')
920      {
921        throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
922      }
923        // since we can't write the result over the input, we need a result offset further along
924      size_t subroffset=roffset+m_samplesize;
925    LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
926      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
927      roffset=offset;
928      size_t loop=0;
929      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
930      size_t step=getNoValues();
931      switch (m_op)
932      {
933        case SYM:
934        for (loop=0;loop<numsteps;++loop)
935        {
936            DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
937            subroffset+=step;
938            offset+=step;
939        }
940        break;
941        case NSYM:
942        for (loop=0;loop<numsteps;++loop)
943        {
944            DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
945            subroffset+=step;
946            offset+=step;
947        }
948        break;
949        default:
950        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
951      }
952      return &v;
953    }
954    
955    /*
956      \brief Compute the value of the expression (unary operation) for the given sample.
957      \return Vector which stores the value of the subexpression for the given sample.
958      \param v A vector to store intermediate results.
959      \param offset Index in v to begin storing results.
960      \param sampleNo Sample number to evaluate.
961      \param roffset (output parameter) the offset in the return vector where the result begins.
962    
963      The return value will be an existing vector so do not deallocate it.
964      If the result is stored in v it should be stored at the offset given.
965      Everything from offset to the end of v should be considered available for this method to use.
966    */
967    DataTypes::ValueType*
968    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
969    {
970        // we assume that any collapsing has been done before we get here
971        // since we only have one argument we don't need to think about only
972        // processing single points.
973      if (m_readytype!='E')
974      {
975        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
976      }
977        // since we can't write the result over the input, we need a result offset further along
978      size_t subroffset;
979      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
980    LAZYDEBUG(cerr << "from=" << offset+m_left->m_samplesize << " to=" << subroffset << " ret=" << roffset << endl;)
981      roffset=offset;
982      size_t loop=0;
983      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
984      size_t step=getNoValues();
985      switch (m_op)
986      {
987        case TRACE:
988        for (loop=0;loop<numsteps;++loop)
989        {
990                DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
991            subroffset+=step;
992            offset+=step;
993        }
994        break;
995        case TRANS:
996        for (loop=0;loop<numsteps;++loop)
997        {
998                DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
999            subroffset+=step;
1000            offset+=step;
1001        }
1002        break;
1003        default:
1004        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1005      }
1006      return &v;
1007    }
1008    
1009    
1010    #define PROC_OP(TYPE,X)                               \
1011        for (int j=0;j<onumsteps;++j)\
1012        {\
1013          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1014          { \
1015    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1016    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1017             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1018    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1019             lroffset+=leftstep; \
1020             rroffset+=rightstep; \
1021          }\
1022          lroffset+=oleftstep;\
1023          rroffset+=orightstep;\
1024        }
1025    
1026    /*
1027      \brief Compute the value of the expression (binary operation) for the given sample.
1028      \return Vector which stores the value of the subexpression for the given sample.
1029      \param v A vector to store intermediate results.
1030      \param offset Index in v to begin storing results.
1031      \param sampleNo Sample number to evaluate.
1032      \param roffset (output parameter) the offset in the return vector where the result begins.
1033    
1034      The return value will be an existing vector so do not deallocate it.
1035      If the result is stored in v it should be stored at the offset given.
1036      Everything from offset to the end of v should be considered available for this method to use.
1037    */
1038    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1039    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1040    // If both children are expanded, then we can process them in a single operation (we treat
1041    // the whole sample as one big datapoint.
1042    // If one of the children is not expanded, then we need to treat each point in the sample
1043    // individually.
1044    // There is an additional complication when scalar operations are considered.
1045    // For example, 2+Vector.
1046    // In this case each double within the point is treated individually
1047    DataTypes::ValueType*
1048    DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1049    {
1050    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1051    
1052      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1053        // first work out which of the children are expanded
1054      bool leftExp=(m_left->m_readytype=='E');
1055      bool rightExp=(m_right->m_readytype=='E');
1056      if (!leftExp && !rightExp)
1057      {
1058        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1059      }
1060      bool leftScalar=(m_left->getRank()==0);
1061      bool rightScalar=(m_right->getRank()==0);
1062      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1063      {
1064        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1065      }
1066      size_t leftsize=m_left->getNoValues();
1067      size_t rightsize=m_right->getNoValues();
1068      size_t chunksize=1;           // how many doubles will be processed in one go
1069      int leftstep=0;       // how far should the left offset advance after each step
1070      int rightstep=0;
1071      int numsteps=0;       // total number of steps for the inner loop
1072      int oleftstep=0;  // the o variables refer to the outer loop
1073      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1074      int onumsteps=1;
1075      
1076      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1077      bool RES=(rightExp && rightScalar);
1078      bool LS=(!leftExp && leftScalar); // left is a single scalar
1079      bool RS=(!rightExp && rightScalar);
1080      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1081      bool RN=(!rightExp && !rightScalar);
1082      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1083      bool REN=(rightExp && !rightScalar);
1084    
1085      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1086      {
1087        chunksize=m_left->getNumDPPSample()*leftsize;
1088        leftstep=0;
1089        rightstep=0;
1090        numsteps=1;
1091      }
1092      else if (LES || RES)
1093      {
1094        chunksize=1;
1095        if (LES)        // left is an expanded scalar
1096        {
1097            if (RS)
1098            {
1099               leftstep=1;
1100               rightstep=0;
1101               numsteps=m_left->getNumDPPSample();
1102            }
1103            else        // RN or REN
1104            {
1105               leftstep=0;
1106               oleftstep=1;
1107               rightstep=1;
1108               orightstep=(RN?-rightsize:0);
1109               numsteps=rightsize;
1110               onumsteps=m_left->getNumDPPSample();
1111            }
1112        }
1113        else        // right is an expanded scalar
1114        {
1115            if (LS)
1116            {
1117               rightstep=1;
1118               leftstep=0;
1119               numsteps=m_right->getNumDPPSample();
1120            }
1121            else
1122            {
1123               rightstep=0;
1124               orightstep=1;
1125               leftstep=1;
1126               oleftstep=(LN?-leftsize:0);
1127               numsteps=leftsize;
1128               onumsteps=m_right->getNumDPPSample();
1129            }
1130        }
1131      }
1132      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1133      {
1134        if (LEN)    // and Right will be a single value
1135        {
1136            chunksize=rightsize;
1137            leftstep=rightsize;
1138            rightstep=0;
1139            numsteps=m_left->getNumDPPSample();
1140            if (RS)
1141            {
1142               numsteps*=leftsize;
1143            }
1144        }
1145        else    // REN
1146        {
1147            chunksize=leftsize;
1148            rightstep=leftsize;
1149            leftstep=0;
1150            numsteps=m_right->getNumDPPSample();
1151            if (LS)
1152            {
1153               numsteps*=rightsize;
1154            }
1155        }
1156      }
1157    
1158      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1159        // Get the values of sub-expressions
1160      const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on
1161        // calcBufss for why we can't put offset as the 2nd param above
1162      const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1163        // the right child starts further along.
1164    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1165    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1166    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1167    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1168    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1169    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1170    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1171      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1172      switch(m_op)
1173      {
1174        case ADD:
1175            PROC_OP(NO_ARG,plus<double>());
1176        break;
1177        case SUB:
1178        PROC_OP(NO_ARG,minus<double>());
1179        break;
1180        case MUL:
1181        PROC_OP(NO_ARG,multiplies<double>());
1182        break;
1183        case DIV:
1184        PROC_OP(NO_ARG,divides<double>());
1185        break;
1186        case POW:
1187           PROC_OP(double (double,double),::pow);
1188        break;
1189        default:
1190        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1191      }
1192      roffset=offset;  
1193      return &v;
1194    }
1195    
1196    
1197    
1198    /*
1199      \brief Compute the value of the expression (tensor product) for the given sample.
1200      \return Vector which stores the value of the subexpression for the given sample.
1201      \param v A vector to store intermediate results.
1202      \param offset Index in v to begin storing results.
1203      \param sampleNo Sample number to evaluate.
1204      \param roffset (output parameter) the offset in the return vector where the result begins.
1205    
1206      The return value will be an existing vector so do not deallocate it.
1207      If the result is stored in v it should be stored at the offset given.
1208      Everything from offset to the end of v should be considered available for this method to use.
1209    */
1210    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1211    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1212    // unlike the other resolve helpers, we must treat these datapoints separately.
1213    DataTypes::ValueType*
1214    DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1215    {
1216    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1217    
1218      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1219        // first work out which of the children are expanded
1220      bool leftExp=(m_left->m_readytype=='E');
1221      bool rightExp=(m_right->m_readytype=='E');
1222      int steps=getNumDPPSample();
1223      int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1224      int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1225      int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0
1226        // Get the values of sub-expressions (leave a gap of one sample for the result).
1227      int gap=offset+m_left->getMaxSampleSize();    // actually only needs to be m_left->m_samplesize
1228      const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1229      gap+=m_right->getMaxSampleSize();
1230      const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1231    LAZYDEBUG(cout << "Post sub calls: " << toString() << endl;)
1232    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1233    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1234    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1235    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1236    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1237      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1238      switch(m_op)
1239      {
1240        case PROD:
1241        for (int i=0;i<steps;++i,resultp+=resultStep)
1242        {
1243    LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1244    LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1245    LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1246              const double *ptr_0 = &((*left)[lroffset]);
1247              const double *ptr_1 = &((*right)[rroffset]);
1248    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1249    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1250              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1251          lroffset+=leftStep;
1252          rroffset+=rightStep;
1253        }
1254        break;
1255        default:
1256        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1257      }
1258      roffset=offset;
1259      return &v;
1260    }
1261    
1262    
1263    
1264    /*
1265      \brief Compute the value of the expression for the given sample.
1266      \return Vector which stores the value of the subexpression for the given sample.
1267      \param v A vector to store intermediate results.
1268      \param offset Index in v to begin storing results.
1269      \param sampleNo Sample number to evaluate.
1270      \param roffset (output parameter) the offset in the return vector where the result begins.
1271    
1272      The return value will be an existing vector so do not deallocate it.
1273    */
1274    // the vector and the offset are a place where the method could write its data if it wishes
1275    // it is not obligated to do so. For example, if it has its own storage already, it can use that.
1276    // Hence the return value to indicate where the data is actually stored.
1277    // Regardless, the storage should be assumed to be used, even if it isn't.
1278    
1279    // the roffset is the offset within the returned vector where the data begins
1280    const DataTypes::ValueType*
1281    DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
1282    {
1283    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1284        // collapse so we have a 'E' node or an IDENTITY for some other type
1285      if (m_readytype!='E' && m_op!=IDENTITY)
1286      {
1287        collapse();
1288      }
1289      if (m_op==IDENTITY)  
1290      {
1291        const ValueType& vec=m_id->getVector();
1292        if (m_readytype=='C')
1293        {
1294        roffset=0;
1295    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1296        return &(vec);
1297        }
1298        roffset=m_id->getPointOffset(sampleNo, 0);
1299    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1300        return &(vec);
1301      }
1302      if (m_readytype!='E')
1303      {
1304        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1305      }
1306      switch (getOpgroup(m_op))
1307    {    {
1308      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");    case G_UNARY:
1309      case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
1310      case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
1311      case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
1312      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
1313      case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
1314      default:
1315        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1316      }
1317    
1318    }
1319    
1320    
1321    // To simplify the memory management, all threads operate on one large vector, rather than one each.
1322    // Each sample is evaluated independently and copied into the result DataExpanded.
1323    DataReady_ptr
1324    DataLazy::resolve()
1325    {
1326    
1327    LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
1328    LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
1329    
1330      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1331      {
1332        collapse();
1333      }
1334      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1335      {
1336        return m_id;
1337      }
1338        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1339      size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
1340        // storage to evaluate its expression
1341      int numthreads=1;
1342    #ifdef _OPENMP
1343      numthreads=getNumberOfThreads();
1344    #endif
1345      ValueType v(numthreads*threadbuffersize);
1346    LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
1347      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1348      ValueType& resvec=result->getVector();
1349      DataReady_ptr resptr=DataReady_ptr(result);
1350      int sample;
1351      size_t outoffset;     // offset in the output data
1352      int totalsamples=getNumSamples();
1353      const ValueType* res=0;   // Vector storing the answer
1354      size_t resoffset=0;       // where in the vector to find the answer
1355    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1356      #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1357      for (sample=0;sample<totalsamples;++sample)
1358      {
1359          if (sample==0)  {ENABLEDEBUG}
1360    LAZYDEBUG(cout << "################################# " << sample << endl;)
1361    #ifdef _OPENMP
1362        res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
1363    #else
1364        res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
1365    #endif
1366    LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1367    LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1368        outoffset=result->getPointOffset(sample,0);
1369    LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1370        for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
1371        {
1372    // LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << endl;)
1373        resvec[outoffset]=(*res)[resoffset];
1374        }
1375    LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1376    LAZYDEBUG(cerr << "*********************************" << endl;)
1377        DISABLEDEBUG
1378    }    }
1379      return resptr;
1380  }  }
1381    
1382  std::string  std::string
1383  DataLazy::toString() const  DataLazy::toString() const
1384  {  {
1385    return "Lazy evaluation object. No details available.";    ostringstream oss;
1386      oss << "Lazy Data:";
1387      intoString(oss);
1388      return oss.str();
1389    }
1390    
1391    
1392    void
1393    DataLazy::intoString(ostringstream& oss) const
1394    {
1395      switch (getOpgroup(m_op))
1396      {
1397      case G_IDENTITY:
1398        if (m_id->isExpanded())
1399        {
1400           oss << "E";
1401        }
1402        else if (m_id->isTagged())
1403        {
1404          oss << "T";
1405        }
1406        else if (m_id->isConstant())
1407        {
1408          oss << "C";
1409        }
1410        else
1411        {
1412          oss << "?";
1413        }
1414        oss << '@' << m_id.get();
1415        break;
1416      case G_BINARY:
1417        oss << '(';
1418        m_left->intoString(oss);
1419        oss << ' ' << opToString(m_op) << ' ';
1420        m_right->intoString(oss);
1421        oss << ')';
1422        break;
1423      case G_UNARY:
1424      case G_UNARY_P:
1425      case G_NP1OUT:
1426      case G_NP1OUT_P:
1427        oss << opToString(m_op) << '(';
1428        m_left->intoString(oss);
1429        oss << ')';
1430        break;
1431      case G_TENSORPROD:
1432        oss << opToString(m_op) << '(';
1433        m_left->intoString(oss);
1434        oss << ", ";
1435        m_right->intoString(oss);
1436        oss << ')';
1437        break;
1438      default:
1439        oss << "UNKNOWN";
1440      }
1441  }  }
1442    
 // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
1443  DataAbstract*  DataAbstract*
1444  DataLazy::deepCopy()  DataLazy::deepCopy()
1445  {  {
1446    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1447    {    {
1448      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1449      case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1450      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1451      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1452      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1453      default:
1454        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1455    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1456  }  }
1457    
1458    
1459    // There is no single, natural interpretation of getLength on DataLazy.
1460    // Instances of DataReady can look at the size of their vectors.
1461    // For lazy though, it could be the size the data would be if it were resolved;
1462    // or it could be some function of the lengths of the DataReady instances which
1463    // form part of the expression.
1464    // Rather than have people making assumptions, I have disabled the method.
1465  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1466  DataLazy::getLength() const  DataLazy::getLength() const
1467  {  {
1468    return length;    throw DataException("getLength() does not make sense for lazy data.");
1469  }  }
1470    
1471    
1472  DataAbstract*  DataAbstract*
1473  DataLazy::getSlice(const DataTypes::RegionType& region) const  DataLazy::getSlice(const DataTypes::RegionType& region) const
1474  {  {
1475    // this seems like a really good one to include I just haven't added it yet    throw DataException("getSlice - not implemented for Lazy objects.");
   throw DataException("getSlice - not implemented for Lazy objects - yet.");  
1476  }  }
1477    
1478    
1479    // To do this we need to rely on our child nodes
1480    DataTypes::ValueType::size_type
1481    DataLazy::getPointOffset(int sampleNo,
1482                     int dataPointNo)
1483    {
1484      if (m_op==IDENTITY)
1485      {
1486        return m_id->getPointOffset(sampleNo,dataPointNo);
1487      }
1488      if (m_readytype!='E')
1489      {
1490        collapse();
1491        return m_id->getPointOffset(sampleNo,dataPointNo);
1492      }
1493      // at this point we do not have an identity node and the expression will be Expanded
1494      // so we only need to know which child to ask
1495      if (m_left->m_readytype=='E')
1496      {
1497        return m_left->getPointOffset(sampleNo,dataPointNo);
1498      }
1499      else
1500      {
1501        return m_right->getPointOffset(sampleNo,dataPointNo);
1502      }
1503    }
1504    
1505    // To do this we need to rely on our child nodes
1506  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1507  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1508                   int dataPointNo) const                   int dataPointNo) const
1509  {  {
1510    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1511      {
1512        return m_id->getPointOffset(sampleNo,dataPointNo);
1513      }
1514      if (m_readytype=='E')
1515      {
1516        // at this point we do not have an identity node and the expression will be Expanded
1517        // so we only need to know which child to ask
1518        if (m_left->m_readytype=='E')
1519        {
1520        return m_left->getPointOffset(sampleNo,dataPointNo);
1521        }
1522        else
1523        {
1524        return m_right->getPointOffset(sampleNo,dataPointNo);
1525        }
1526      }
1527      if (m_readytype=='C')
1528      {
1529        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1530      }
1531      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1532    }
1533    
1534    // It would seem that DataTagged will need to be treated differently since even after setting all tags
1535    // to zero, all the tags from all the DataTags would be in the result.
1536    // However since they all have the same value (0) whether they are there or not should not matter.
1537    // So I have decided that for all types this method will create a constant 0.
1538    // It can be promoted up as required.
1539    // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management
1540    // but we can deal with that if it arrises.
1541    void
1542    DataLazy::setToZero()
1543    {
1544      DataTypes::ValueType v(getNoValues(),0);
1545      m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1546      m_op=IDENTITY;
1547      m_right.reset();  
1548      m_left.reset();
1549      m_readytype='C';
1550      m_buffsRequired=1;
1551  }  }
1552    
1553  }   // end namespace  }   // end namespace

Legend:
Removed from v.1865  
changed lines
  Added in v.2157

  ViewVC Help
Powered by ViewVC 1.1.26