/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escript/src/DataLazy.cpp revision 2824 by jfenwick, Wed Dec 16 01:22:56 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #include <iomanip>      // for some fancy formatting in debug
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48    
49    
50    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52    /*
53    How does DataLazy work?
54    ~~~~~~~~~~~~~~~~~~~~~~~
55    
56    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
57    denoted left and right.
58    
59    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
60    This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63    Note that IDENITY is not considered a unary operation.
64    
65    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66    It must however form a DAG (directed acyclic graph).
67    I will refer to individual DataLazy objects with the structure as nodes.
68    
69    Each node also stores:
70    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71        evaluated.
72    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
73        evaluate the expression.
74    - m_samplesize ~ the number of doubles stored in a sample.
75    
76    When a new node is created, the above values are computed based on the values in the child nodes.
77    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
78    
79    The resolve method, which produces a DataReady from a DataLazy, does the following:
80    1) Create a DataReady to hold the new result.
81    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
82    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
83    
84    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
85    
86    resolveSample returns a Vector* and an offset within that vector where the result is stored.
87    Normally, this would be v, but for identity nodes their internal vector is returned instead.
88    
89    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
90    
91    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93    
94    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95    1) Add to the ES_optype.
96    2) determine what opgroup your operation belongs to (X)
97    3) add a string for the op to the end of ES_opstrings
98    4) increase ES_opcount
99    5) add an entry (X) to opgroups
100    6) add an entry to the switch in collapseToReady
101    7) add an entry to resolveX
102    */
103    
104    
105  using namespace std;  using namespace std;
106  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 108  using namespace boost;
108  namespace escript  namespace escript
109  {  {
110    
 const std::string&  
 opToString(ES_optype op);  
   
111  namespace  namespace
112  {  {
113    
114    
115    // enabling this will print out when ever the maximum stacksize used by resolve increases
116    // it assumes _OPENMP is also in use
117    //#define LAZY_STACK_PROF
118    
119    
120    
121    #ifndef _OPENMP
122      #ifdef LAZY_STACK_PROF
123      #undef LAZY_STACK_PROF
124      #endif
125    #endif
126    
127    
128    #ifdef LAZY_STACK_PROF
129    std::vector<void*> stackstart(getNumberOfThreads());
130    std::vector<void*> stackend(getNumberOfThreads());
131    size_t maxstackuse=0;
132    #endif
133    
134  enum ES_opgroup  enum ES_opgroup
135  {  {
136     G_UNKNOWN,     G_UNKNOWN,
137     G_IDENTITY,     G_IDENTITY,
138     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
139     G_UNARY     G_UNARY,     // pointwise operations with one argument
140       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
141       G_NP1OUT,        // non-pointwise op with one output
142       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
143       G_TENSORPROD,    // general tensor product
144       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
145       G_REDUCTION      // non-pointwise unary op with a scalar output
146  };  };
147    
148    
149    
150    
151  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
152                "sin","cos","tan",
153              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
154              "asinh","acosh","atanh",              "asinh","acosh","atanh",
155              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
156              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
157  int ES_opcount=32;              "symmetric","nonsymmetric",
158  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
159              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose", "trace",
160              G_UNARY,G_UNARY,G_UNARY,                    // 19              "swapaxes",
161              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27              "minval", "maxval"};
162              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};  int ES_opcount=43;
163    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
164                G_UNARY,G_UNARY,G_UNARY, //10
165                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
166                G_UNARY,G_UNARY,G_UNARY,                    // 20
167                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
168                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
169                G_NP1OUT,G_NP1OUT,
170                G_TENSORPROD,
171                G_NP1OUT_P, G_NP1OUT_P,
172                G_NP1OUT_2P,
173                G_REDUCTION, G_REDUCTION};
174  inline  inline
175  ES_opgroup  ES_opgroup
176  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 187  resultFS(DataAbstract_ptr left, DataAbst
187      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
188      // programming error exception.      // programming error exception.
189    
190      FunctionSpace l=left->getFunctionSpace();
191      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
192      {    if (l!=r)
193          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
194      }      if (r.probeInterpolation(l))
195      return left->getFunctionSpace();      {
196        return l;
197        }
198        if (l.probeInterpolation(r))
199        {
200        return r;
201        }
202        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
203      }
204      return l;
205  }  }
206    
207  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
208    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
209  DataTypes::ShapeType  DataTypes::ShapeType
210  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
211  {  {
212      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
213      {      {
214          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
215          {
216            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
217          }
218    
219          if (left->getRank()==0)   // we need to allow scalar * anything
220          {
221            return right->getShape();
222          }
223          if (right->getRank()==0)
224          {
225            return left->getShape();
226          }
227          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
228      }      }
229      return left->getShape();      return left->getShape();
230  }  }
231    
232  size_t  // return the shape for "op left"
233  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
234    DataTypes::ShapeType
235    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
236  {  {
237     switch (getOpgroup(op))      switch(op)
238     {      {
239     case G_BINARY: return left->getLength();          case TRANS:
240     case G_UNARY: return left->getLength();         {            // for the scoping of variables
241     default:          const DataTypes::ShapeType& s=left->getShape();
242      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
243     }          int rank=left->getRank();
244            if (axis_offset<0 || axis_offset>rank)
245            {
246                stringstream e;
247                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
248                throw DataException(e.str());
249            }
250            for (int i=0; i<rank; i++)
251            {
252               int index = (axis_offset+i)%rank;
253               sh.push_back(s[index]); // Append to new shape
254            }
255            return sh;
256           }
257        break;
258        case TRACE:
259           {
260            int rank=left->getRank();
261            if (rank<2)
262            {
263               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
264            }
265            if ((axis_offset>rank-2) || (axis_offset<0))
266            {
267               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
268            }
269            if (rank==2)
270            {
271               return DataTypes::scalarShape;
272            }
273            else if (rank==3)
274            {
275               DataTypes::ShapeType sh;
276                   if (axis_offset==0)
277               {
278                    sh.push_back(left->getShape()[2]);
279                   }
280                   else     // offset==1
281               {
282                sh.push_back(left->getShape()[0]);
283                   }
284               return sh;
285            }
286            else if (rank==4)
287            {
288               DataTypes::ShapeType sh;
289               const DataTypes::ShapeType& s=left->getShape();
290                   if (axis_offset==0)
291               {
292                    sh.push_back(s[2]);
293                    sh.push_back(s[3]);
294                   }
295                   else if (axis_offset==1)
296               {
297                    sh.push_back(s[0]);
298                    sh.push_back(s[3]);
299                   }
300               else     // offset==2
301               {
302                sh.push_back(s[0]);
303                sh.push_back(s[1]);
304               }
305               return sh;
306            }
307            else        // unknown rank
308            {
309               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
310            }
311           }
312        break;
313            default:
314        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
315        }
316  }  }
317    
318  int  DataTypes::ShapeType
319  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
320  {  {
321     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
322     {       // Some of the checks are probably redundant here
323     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
324     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
325     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
326     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
327      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
328     }       int rank=left->getRank();
329         if (rank<2) {
330            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
331         }
332         if (axis0<0 || axis0>rank-1) {
333            stringstream e;
334            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
335            throw DataException(e.str());
336         }
337         if (axis1<0 || axis1>rank-1) {
338            stringstream e;
339            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
340            throw DataException(e.str());
341         }
342         if (axis0 == axis1) {
343             throw DataException("Error - Data::swapaxes: axis indices must be different.");
344         }
345         if (axis0 > axis1) {
346             axis0_tmp=axis1;
347             axis1_tmp=axis0;
348         } else {
349             axis0_tmp=axis0;
350             axis1_tmp=axis1;
351         }
352         for (int i=0; i<rank; i++) {
353           if (i == axis0_tmp) {
354              out_shape.push_back(s[axis1_tmp]);
355           } else if (i == axis1_tmp) {
356              out_shape.push_back(s[axis0_tmp]);
357           } else {
358              out_shape.push_back(s[i]);
359           }
360         }
361        return out_shape;
362  }  }
363    
364    
365    // determine the output shape for the general tensor product operation
366    // the additional parameters return information required later for the product
367    // the majority of this code is copy pasted from C_General_Tensor_Product
368    DataTypes::ShapeType
369    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
370    {
371        
372      // Get rank and shape of inputs
373      int rank0 = left->getRank();
374      int rank1 = right->getRank();
375      const DataTypes::ShapeType& shape0 = left->getShape();
376      const DataTypes::ShapeType& shape1 = right->getShape();
377    
378      // Prepare for the loops of the product and verify compatibility of shapes
379      int start0=0, start1=0;
380      if (transpose == 0)       {}
381      else if (transpose == 1)  { start0 = axis_offset; }
382      else if (transpose == 2)  { start1 = rank1-axis_offset; }
383      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
384    
385      if (rank0<axis_offset)
386      {
387        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
388      }
389    
390      // Adjust the shapes for transpose
391      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
392      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
393      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
394      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
395    
396      // Prepare for the loops of the product
397      SL=1, SM=1, SR=1;
398      for (int i=0; i<rank0-axis_offset; i++)   {
399        SL *= tmpShape0[i];
400      }
401      for (int i=rank0-axis_offset; i<rank0; i++)   {
402        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
403          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
404        }
405        SM *= tmpShape0[i];
406      }
407      for (int i=axis_offset; i<rank1; i++)     {
408        SR *= tmpShape1[i];
409      }
410    
411      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
412      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
413      {         // block to limit the scope of out_index
414         int out_index=0;
415         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
416         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
417      }
418    
419      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
420      {
421         ostringstream os;
422         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
423         throw DataException(os.str());
424      }
425    
426      return shape2;
427    }
428    
429  }   // end anonymous namespace  }   // end anonymous namespace
430    
431    
432    
433    // Return a string representing the operation
434  const std::string&  const std::string&
435  opToString(ES_optype op)  opToString(ES_optype op)
436  {  {
# Line 138  opToString(ES_optype op) Line 441  opToString(ES_optype op)
441    return ES_opstrings[op];    return ES_opstrings[op];
442  }  }
443    
444    void DataLazy::LazyNodeSetup()
445    {
446    #ifdef _OPENMP
447        int numthreads=omp_get_max_threads();
448        m_samples.resize(numthreads*m_samplesize);
449        m_sampleids=new int[numthreads];
450        for (int i=0;i<numthreads;++i)
451        {
452            m_sampleids[i]=-1;  
453        }
454    #else
455        m_samples.resize(m_samplesize);
456        m_sampleids=new int[1];
457        m_sampleids[0]=-1;
458    #endif  // _OPENMP
459    }
460    
461    
462    // Creates an identity node
463  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
464      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
465      m_op(IDENTITY)      ,m_sampleids(0),
466        m_samples(1)
467  {  {
468     if (p->isLazy())     if (p->isLazy())
469     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
470      // I don't want identity of Lazy.      // I don't want identity of Lazy.
471      // Question: Why would that be so bad?      // Question: Why would that be so bad?
472      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 153  DataLazy::DataLazy(DataAbstract_ptr p) Line 474  DataLazy::DataLazy(DataAbstract_ptr p)
474     }     }
475     else     else
476     {     {
477      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
478        DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
479        makeIdentity(dr);
480    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
481     }     }
482     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
483  }  }
484    
485  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
486      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
487      m_op(op)      m_op(op),
488        m_axis_offset(0),
489        m_transpose(0),
490        m_SL(0), m_SM(0), m_SR(0)
491  {  {
492     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
493     {     {
494      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
495     }     }
496    
497     DataLazy_ptr lleft;     DataLazy_ptr lleft;
498     if (!left->isLazy())     if (!left->isLazy())
499     {     {
# Line 178  DataLazy::DataLazy(DataAbstract_ptr left Line 503  DataLazy::DataLazy(DataAbstract_ptr left
503     {     {
504      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
505     }     }
506     m_length=left->getLength();     m_readytype=lleft->m_readytype;
507     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
508     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
509       m_children=m_left->m_children+1;
510       m_height=m_left->m_height+1;
511       LazyNodeSetup();
512       SIZELIMIT
513  }  }
514    
515    
516  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
517    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
518      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
519      m_left(left),      m_op(op),
520      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
521  {  {
522     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
523       if ((getOpgroup(op)!=G_BINARY))
524     {     {
525      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
526     }     }
527     m_length=resultLength(m_left,m_right,m_op);  
528       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
529       {
530        FunctionSpace fs=getFunctionSpace();
531        Data ltemp(left);
532        Data tmp(ltemp,fs);
533        left=tmp.borrowDataPtr();
534       }
535       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
536       {
537        Data tmp(Data(right),getFunctionSpace());
538        right=tmp.borrowDataPtr();
539    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
540       }
541       left->operandCheck(*right);
542    
543       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
544       {
545        m_left=dynamic_pointer_cast<DataLazy>(left);
546    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
547       }
548       else
549       {
550        m_left=DataLazy_ptr(new DataLazy(left));
551    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
552       }
553       if (right->isLazy())
554       {
555        m_right=dynamic_pointer_cast<DataLazy>(right);
556    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
557       }
558       else
559       {
560        m_right=DataLazy_ptr(new DataLazy(right));
561    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
562       }
563       char lt=m_left->m_readytype;
564       char rt=m_right->m_readytype;
565       if (lt=='E' || rt=='E')
566       {
567        m_readytype='E';
568       }
569       else if (lt=='T' || rt=='T')
570       {
571        m_readytype='T';
572       }
573       else
574       {
575        m_readytype='C';
576       }
577     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
578     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+m_right->m_children+2;
579  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
580       LazyNodeSetup();
581       SIZELIMIT
582    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
583  }  }
584    
585  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
586      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
587      m_op(op)      m_op(op),
588        m_axis_offset(axis_offset),
589        m_transpose(transpose)
590  {  {
591     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
592       {
593        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
594       }
595       if ((transpose>2) || (transpose<0))
596       {
597        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
598       }
599       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
600     {     {
601      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");      FunctionSpace fs=getFunctionSpace();
602        Data ltemp(left);
603        Data tmp(ltemp,fs);
604        left=tmp.borrowDataPtr();
605     }     }
606     if (left->isLazy())     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
607       {
608        Data tmp(Data(right),getFunctionSpace());
609        right=tmp.borrowDataPtr();
610       }
611    //    left->operandCheck(*right);
612    
613       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
614     {     {
615      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
616     }     }
# Line 225  DataLazy::DataLazy(DataAbstract_ptr left Line 626  DataLazy::DataLazy(DataAbstract_ptr left
626     {     {
627      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
628     }     }
629       char lt=m_left->m_readytype;
630     m_length=resultLength(m_left,m_right,m_op);     char rt=m_right->m_readytype;
631       if (lt=='E' || rt=='E')
632       {
633        m_readytype='E';
634       }
635       else if (lt=='T' || rt=='T')
636       {
637        m_readytype='T';
638       }
639       else
640       {
641        m_readytype='C';
642       }
643     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
644     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+m_right->m_children+2;
645  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
646       LazyNodeSetup();
647       SIZELIMIT
648    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
649  }  }
650    
651    
652  DataLazy::~DataLazy()  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
653        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
654        m_op(op),
655        m_axis_offset(axis_offset),
656        m_transpose(0),
657        m_tol(0)
658    {
659       if ((getOpgroup(op)!=G_NP1OUT_P))
660       {
661        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
662       }
663       DataLazy_ptr lleft;
664       if (!left->isLazy())
665       {
666        lleft=DataLazy_ptr(new DataLazy(left));
667       }
668       else
669       {
670        lleft=dynamic_pointer_cast<DataLazy>(left);
671       }
672       m_readytype=lleft->m_readytype;
673       m_left=lleft;
674       m_samplesize=getNumDPPSample()*getNoValues();
675       m_children=m_left->m_children+1;
676       m_height=m_left->m_height+1;
677       LazyNodeSetup();
678       SIZELIMIT
679    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
680    }
681    
682    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
683        : parent(left->getFunctionSpace(), left->getShape()),
684        m_op(op),
685        m_axis_offset(0),
686        m_transpose(0),
687        m_tol(tol)
688  {  {
689       if ((getOpgroup(op)!=G_UNARY_P))
690       {
691        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
692       }
693       DataLazy_ptr lleft;
694       if (!left->isLazy())
695       {
696        lleft=DataLazy_ptr(new DataLazy(left));
697       }
698       else
699       {
700        lleft=dynamic_pointer_cast<DataLazy>(left);
701       }
702       m_readytype=lleft->m_readytype;
703       m_left=lleft;
704       m_samplesize=getNumDPPSample()*getNoValues();
705       m_children=m_left->m_children+1;
706       m_height=m_left->m_height+1;
707       LazyNodeSetup();
708       SIZELIMIT
709    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
710  }  }
711    
712    
713  int  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
714  DataLazy::getBuffsRequired() const      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
715        m_op(op),
716        m_axis_offset(axis0),
717        m_transpose(axis1),
718        m_tol(0)
719  {  {
720      return m_buffsRequired;     if ((getOpgroup(op)!=G_NP1OUT_2P))
721       {
722        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
723       }
724       DataLazy_ptr lleft;
725       if (!left->isLazy())
726       {
727        lleft=DataLazy_ptr(new DataLazy(left));
728       }
729       else
730       {
731        lleft=dynamic_pointer_cast<DataLazy>(left);
732       }
733       m_readytype=lleft->m_readytype;
734       m_left=lleft;
735       m_samplesize=getNumDPPSample()*getNoValues();
736       m_children=m_left->m_children+1;
737       m_height=m_left->m_height+1;
738       LazyNodeSetup();
739       SIZELIMIT
740    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
741    }
742    
743    DataLazy::~DataLazy()
744    {
745       delete[] m_sampleids;
746  }  }
747    
748    
749  // the vector and the offset are a place where the method could write its data if it wishes  /*
750  // it is not obligated to do so. For example, if it has its own storage already, it can use that.    \brief Evaluates the expression using methods on Data.
751  // Hence the return value to indicate where the data is actually stored.    This does the work for the collapse method.
752  // Regardless, the storage should be assumed to be used, even if it isn't.    For reasons of efficiency do not call this method on DataExpanded nodes.
753  const double*  */
754  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  DataReady_ptr
755    DataLazy::collapseToReady()
756  {  {
757    if (m_op==IDENTITY)      if (m_readytype=='E')
758      { // this is more an efficiency concern than anything else
759        throw DataException("Programmer Error - do not use collapse on Expanded data.");
760      }
761      if (m_op==IDENTITY)
762    {    {
763      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
764    }    }
765    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
766    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
767    const double* right=0;    Data right;
768    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
769    {    {
770      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
771    }    }
772    double* result=&(v[offset]);    Data result;
773      switch(m_op)
774    {    {
775      switch(m_op)      case ADD:
776      {      result=left+right;
     case ADD:       // since these are pointwise ops, pretend each sample is one point  
     tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  
777      break;      break;
778      case SUB:            case SUB:      
779      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      result=left-right;
780      break;      break;
781      case MUL:            case MUL:      
782      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      result=left*right;
783      break;      break;
784      case DIV:            case DIV:      
785      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      result=left/right;
786      break;      break;
 // unary ops  
787      case SIN:      case SIN:
788      tensor_unary_operation(m_samplesize, left, result, ::sin);      result=left.sin();  
789        break;
790        case COS:
791        result=left.cos();
792        break;
793        case TAN:
794        result=left.tan();
795        break;
796        case ASIN:
797        result=left.asin();
798        break;
799        case ACOS:
800        result=left.acos();
801        break;
802        case ATAN:
803        result=left.atan();
804        break;
805        case SINH:
806        result=left.sinh();
807        break;
808        case COSH:
809        result=left.cosh();
810        break;
811        case TANH:
812        result=left.tanh();
813        break;
814        case ERF:
815        result=left.erf();
816        break;
817       case ASINH:
818        result=left.asinh();
819        break;
820       case ACOSH:
821        result=left.acosh();
822        break;
823       case ATANH:
824        result=left.atanh();
825        break;
826        case LOG10:
827        result=left.log10();
828        break;
829        case LOG:
830        result=left.log();
831        break;
832        case SIGN:
833        result=left.sign();
834        break;
835        case ABS:
836        result=left.abs();
837        break;
838        case NEG:
839        result=left.neg();
840        break;
841        case POS:
842        // it doesn't mean anything for delayed.
843        // it will just trigger a deep copy of the lazy object
844        throw DataException("Programmer error - POS not supported for lazy data.");
845        break;
846        case EXP:
847        result=left.exp();
848        break;
849        case SQRT:
850        result=left.sqrt();
851        break;
852        case RECIP:
853        result=left.oneOver();
854        break;
855        case GZ:
856        result=left.wherePositive();
857        break;
858        case LZ:
859        result=left.whereNegative();
860        break;
861        case GEZ:
862        result=left.whereNonNegative();
863        break;
864        case LEZ:
865        result=left.whereNonPositive();
866        break;
867        case NEZ:
868        result=left.whereNonZero(m_tol);
869        break;
870        case EZ:
871        result=left.whereZero(m_tol);
872        break;
873        case SYM:
874        result=left.symmetric();
875        break;
876        case NSYM:
877        result=left.nonsymmetric();
878        break;
879        case PROD:
880        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
881        break;
882        case TRANS:
883        result=left.transpose(m_axis_offset);
884        break;
885        case TRACE:
886        result=left.trace(m_axis_offset);
887        break;
888        case SWAP:
889        result=left.swapaxes(m_axis_offset, m_transpose);
890        break;
891        case MINVAL:
892        result=left.minval();
893        break;
894        case MAXVAL:
895        result=left.minval();
896        break;
897        default:
898        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
899      }
900      return result.borrowReadyPtr();
901    }
902    
903    /*
904       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
905       This method uses the original methods on the Data class to evaluate the expressions.
906       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
907       the purpose of using DataLazy in the first place).
908    */
909    void
910    DataLazy::collapse()
911    {
912      if (m_op==IDENTITY)
913      {
914        return;
915      }
916      if (m_readytype=='E')
917      { // this is more an efficiency concern than anything else
918        throw DataException("Programmer Error - do not use collapse on Expanded data.");
919      }
920      m_id=collapseToReady();
921      m_op=IDENTITY;
922    }
923    
924    
925    
926    
927    
928    
929    #define PROC_OP(TYPE,X)                               \
930        for (int j=0;j<onumsteps;++j)\
931        {\
932          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
933          { \
934    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
935    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
936             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
937    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
938             lroffset+=leftstep; \
939             rroffset+=rightstep; \
940          }\
941          lroffset+=oleftstep;\
942          rroffset+=orightstep;\
943        }
944    
945    
946    // The result will be stored in m_samples
947    // The return value is a pointer to the DataVector, offset is the offset within the return value
948    const DataTypes::ValueType*
949    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
950    {
951    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
952        // collapse so we have a 'E' node or an IDENTITY for some other type
953      if (m_readytype!='E' && m_op!=IDENTITY)
954      {
955        collapse();
956      }
957      if (m_op==IDENTITY)  
958      {
959        const ValueType& vec=m_id->getVectorRO();
960        roffset=m_id->getPointOffset(sampleNo, 0);
961    #ifdef LAZY_STACK_PROF
962    int x;
963    if (&x<stackend[omp_get_thread_num()])
964    {
965           stackend[omp_get_thread_num()]=&x;
966    }
967    #endif
968        return &(vec);
969      }
970      if (m_readytype!='E')
971      {
972        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
973      }
974      if (m_sampleids[tid]==sampleNo)
975      {
976        roffset=tid*m_samplesize;
977        return &(m_samples);        // sample is already resolved
978      }
979      m_sampleids[tid]=sampleNo;
980      switch (getOpgroup(m_op))
981      {
982      case G_UNARY:
983      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
984      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
985      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
986      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
987      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
988      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
989      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
990      default:
991        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
992      }
993    }
994    
995    const DataTypes::ValueType*
996    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
997    {
998        // we assume that any collapsing has been done before we get here
999        // since we only have one argument we don't need to think about only
1000        // processing single points.
1001        // we will also know we won't get identity nodes
1002      if (m_readytype!='E')
1003      {
1004        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1005      }
1006      if (m_op==IDENTITY)
1007      {
1008        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1009      }
1010      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1011      const double* left=&((*leftres)[roffset]);
1012      roffset=m_samplesize*tid;
1013      double* result=&(m_samples[roffset]);
1014      switch (m_op)
1015      {
1016        case SIN:  
1017        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1018      break;      break;
1019      case COS:      case COS:
1020      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1021      break;      break;
1022      case TAN:      case TAN:
1023      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1024      break;      break;
1025      case ASIN:      case ASIN:
1026      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1027      break;      break;
1028      case ACOS:      case ACOS:
1029      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1030      break;      break;
1031      case ATAN:      case ATAN:
1032      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1033      break;      break;
1034      case SINH:      case SINH:
1035      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1036      break;      break;
1037      case COSH:      case COSH:
1038      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1039      break;      break;
1040      case TANH:      case TANH:
1041      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1042      break;      break;
1043      case ERF:      case ERF:
1044  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1045      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1046  #else  #else
1047      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1048      break;      break;
1049  #endif  #endif
1050     case ASINH:     case ASINH:
1051  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1052      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1053  #else  #else
1054      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1055  #endif    #endif  
1056      break;      break;
1057     case ACOSH:     case ACOSH:
1058  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1059      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1060  #else  #else
1061      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1062  #endif    #endif  
1063      break;      break;
1064     case ATANH:     case ATANH:
1065  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1066      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1067  #else  #else
1068      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1069  #endif    #endif  
1070      break;      break;
1071      case LOG10:      case LOG10:
1072      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1073      break;      break;
1074      case LOG:      case LOG:
1075      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1076      break;      break;
1077      case SIGN:      case SIGN:
1078      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1079      break;      break;
1080      case ABS:      case ABS:
1081      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1082      break;      break;
1083      case NEG:      case NEG:
1084      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 357  DataLazy::resolveSample(ValueType& v,int Line 1089  DataLazy::resolveSample(ValueType& v,int
1089      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1090      break;      break;
1091      case EXP:      case EXP:
1092      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1093      break;      break;
1094      case SQRT:      case SQRT:
1095      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1096      break;      break;
1097      case RECIP:      case RECIP:
1098      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 377  DataLazy::resolveSample(ValueType& v,int Line 1109  DataLazy::resolveSample(ValueType& v,int
1109      case LEZ:      case LEZ:
1110      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1111      break;      break;
1112    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1113        case NEZ:
1114        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1115        break;
1116        case EZ:
1117        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1118        break;
1119    
1120      default:      default:
1121      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1122      }    }
1123      return &(m_samples);
1124    }
1125    
1126    
1127    const DataTypes::ValueType*
1128    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1129    {
1130        // we assume that any collapsing has been done before we get here
1131        // since we only have one argument we don't need to think about only
1132        // processing single points.
1133        // we will also know we won't get identity nodes
1134      if (m_readytype!='E')
1135      {
1136        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1137      }
1138      if (m_op==IDENTITY)
1139      {
1140        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1141      }
1142      size_t loffset=0;
1143      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1144    
1145      roffset=m_samplesize*tid;
1146      unsigned int ndpps=getNumDPPSample();
1147      unsigned int psize=DataTypes::noValues(getShape());
1148      double* result=&(m_samples[roffset]);
1149      switch (m_op)
1150      {
1151        case MINVAL:
1152        {
1153          for (unsigned int z=0;z<ndpps;++z)
1154          {
1155            FMin op;
1156            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1157            loffset+=psize;
1158            result++;
1159          }
1160        }
1161        break;
1162        case MAXVAL:
1163        {
1164          for (unsigned int z=0;z<ndpps;++z)
1165          {
1166          FMax op;
1167          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1168          loffset+=psize;
1169          result++;
1170          }
1171        }
1172        break;
1173        default:
1174        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1175      }
1176      return &(m_samples);
1177    }
1178    
1179    const DataTypes::ValueType*
1180    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1181    {
1182        // we assume that any collapsing has been done before we get here
1183        // since we only have one argument we don't need to think about only
1184        // processing single points.
1185      if (m_readytype!='E')
1186      {
1187        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1188      }
1189      if (m_op==IDENTITY)
1190      {
1191        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1192      }
1193      size_t subroffset;
1194      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1195      roffset=m_samplesize*tid;
1196      size_t loop=0;
1197      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1198      size_t step=getNoValues();
1199      size_t offset=roffset;
1200      switch (m_op)
1201      {
1202        case SYM:
1203        for (loop=0;loop<numsteps;++loop)
1204        {
1205            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1206            subroffset+=step;
1207            offset+=step;
1208        }
1209        break;
1210        case NSYM:
1211        for (loop=0;loop<numsteps;++loop)
1212        {
1213            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1214            subroffset+=step;
1215            offset+=step;
1216        }
1217        break;
1218        default:
1219        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1220      }
1221      return &m_samples;
1222    }
1223    
1224    const DataTypes::ValueType*
1225    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1226    {
1227        // we assume that any collapsing has been done before we get here
1228        // since we only have one argument we don't need to think about only
1229        // processing single points.
1230      if (m_readytype!='E')
1231      {
1232        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1233    }    }
1234    return result;    if (m_op==IDENTITY)
1235      {
1236        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1237      }
1238      size_t subroffset;
1239      size_t offset;
1240      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1241      roffset=m_samplesize*tid;
1242      offset=roffset;
1243      size_t loop=0;
1244      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1245      size_t outstep=getNoValues();
1246      size_t instep=m_left->getNoValues();
1247      switch (m_op)
1248      {
1249        case TRACE:
1250        for (loop=0;loop<numsteps;++loop)
1251        {
1252                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1253            subroffset+=instep;
1254            offset+=outstep;
1255        }
1256        break;
1257        case TRANS:
1258        for (loop=0;loop<numsteps;++loop)
1259        {
1260                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1261            subroffset+=instep;
1262            offset+=outstep;
1263        }
1264        break;
1265        default:
1266        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1267      }
1268      return &m_samples;
1269    }
1270    
1271    
1272    const DataTypes::ValueType*
1273    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1274    {
1275      if (m_readytype!='E')
1276      {
1277        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1278      }
1279      if (m_op==IDENTITY)
1280      {
1281        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1282      }
1283      size_t subroffset;
1284      size_t offset;
1285      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1286      roffset=m_samplesize*tid;
1287      offset=roffset;
1288      size_t loop=0;
1289      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1290      size_t outstep=getNoValues();
1291      size_t instep=m_left->getNoValues();
1292      switch (m_op)
1293      {
1294        case SWAP:
1295        for (loop=0;loop<numsteps;++loop)
1296        {
1297                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1298            subroffset+=instep;
1299            offset+=outstep;
1300        }
1301        break;
1302        default:
1303        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1304      }
1305      return &m_samples;
1306    }
1307    
1308    
1309    
1310    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1311    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1312    // If both children are expanded, then we can process them in a single operation (we treat
1313    // the whole sample as one big datapoint.
1314    // If one of the children is not expanded, then we need to treat each point in the sample
1315    // individually.
1316    // There is an additional complication when scalar operations are considered.
1317    // For example, 2+Vector.
1318    // In this case each double within the point is treated individually
1319    const DataTypes::ValueType*
1320    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1321    {
1322    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1323    
1324      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1325        // first work out which of the children are expanded
1326      bool leftExp=(m_left->m_readytype=='E');
1327      bool rightExp=(m_right->m_readytype=='E');
1328      if (!leftExp && !rightExp)
1329      {
1330        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1331      }
1332      bool leftScalar=(m_left->getRank()==0);
1333      bool rightScalar=(m_right->getRank()==0);
1334      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1335      {
1336        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1337      }
1338      size_t leftsize=m_left->getNoValues();
1339      size_t rightsize=m_right->getNoValues();
1340      size_t chunksize=1;           // how many doubles will be processed in one go
1341      int leftstep=0;       // how far should the left offset advance after each step
1342      int rightstep=0;
1343      int numsteps=0;       // total number of steps for the inner loop
1344      int oleftstep=0;  // the o variables refer to the outer loop
1345      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1346      int onumsteps=1;
1347      
1348      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1349      bool RES=(rightExp && rightScalar);
1350      bool LS=(!leftExp && leftScalar); // left is a single scalar
1351      bool RS=(!rightExp && rightScalar);
1352      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1353      bool RN=(!rightExp && !rightScalar);
1354      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1355      bool REN=(rightExp && !rightScalar);
1356    
1357      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1358      {
1359        chunksize=m_left->getNumDPPSample()*leftsize;
1360        leftstep=0;
1361        rightstep=0;
1362        numsteps=1;
1363      }
1364      else if (LES || RES)
1365      {
1366        chunksize=1;
1367        if (LES)        // left is an expanded scalar
1368        {
1369            if (RS)
1370            {
1371               leftstep=1;
1372               rightstep=0;
1373               numsteps=m_left->getNumDPPSample();
1374            }
1375            else        // RN or REN
1376            {
1377               leftstep=0;
1378               oleftstep=1;
1379               rightstep=1;
1380               orightstep=(RN ? -(int)rightsize : 0);
1381               numsteps=rightsize;
1382               onumsteps=m_left->getNumDPPSample();
1383            }
1384        }
1385        else        // right is an expanded scalar
1386        {
1387            if (LS)
1388            {
1389               rightstep=1;
1390               leftstep=0;
1391               numsteps=m_right->getNumDPPSample();
1392            }
1393            else
1394            {
1395               rightstep=0;
1396               orightstep=1;
1397               leftstep=1;
1398               oleftstep=(LN ? -(int)leftsize : 0);
1399               numsteps=leftsize;
1400               onumsteps=m_right->getNumDPPSample();
1401            }
1402        }
1403      }
1404      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1405      {
1406        if (LEN)    // and Right will be a single value
1407        {
1408            chunksize=rightsize;
1409            leftstep=rightsize;
1410            rightstep=0;
1411            numsteps=m_left->getNumDPPSample();
1412            if (RS)
1413            {
1414               numsteps*=leftsize;
1415            }
1416        }
1417        else    // REN
1418        {
1419            chunksize=leftsize;
1420            rightstep=leftsize;
1421            leftstep=0;
1422            numsteps=m_right->getNumDPPSample();
1423            if (LS)
1424            {
1425               numsteps*=rightsize;
1426            }
1427        }
1428      }
1429    
1430      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1431        // Get the values of sub-expressions
1432      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1433      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1434    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1435    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1436    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1437    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1438    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1439    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1440    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1441    
1442    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1443    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1444    
1445    
1446      roffset=m_samplesize*tid;
1447      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
1448      switch(m_op)
1449      {
1450        case ADD:
1451            PROC_OP(NO_ARG,plus<double>());
1452        break;
1453        case SUB:
1454        PROC_OP(NO_ARG,minus<double>());
1455        break;
1456        case MUL:
1457        PROC_OP(NO_ARG,multiplies<double>());
1458        break;
1459        case DIV:
1460        PROC_OP(NO_ARG,divides<double>());
1461        break;
1462        case POW:
1463           PROC_OP(double (double,double),::pow);
1464        break;
1465        default:
1466        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1467      }
1468    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1469      return &m_samples;
1470    }
1471    
1472    
1473    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1474    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1475    // unlike the other resolve helpers, we must treat these datapoints separately.
1476    const DataTypes::ValueType*
1477    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1478    {
1479    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1480    
1481      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1482        // first work out which of the children are expanded
1483      bool leftExp=(m_left->m_readytype=='E');
1484      bool rightExp=(m_right->m_readytype=='E');
1485      int steps=getNumDPPSample();
1486      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1487      int rightStep=(rightExp?m_right->getNoValues() : 0);
1488    
1489      int resultStep=getNoValues();
1490      roffset=m_samplesize*tid;
1491      size_t offset=roffset;
1492    
1493      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1494    
1495      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1496    
1497    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1498    cout << getNoValues() << endl;)
1499    
1500    
1501    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1502    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1503    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1504    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1505    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1506    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1507    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1508    
1509      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
1510      switch(m_op)
1511      {
1512        case PROD:
1513        for (int i=0;i<steps;++i,resultp+=resultStep)
1514        {
1515              const double *ptr_0 = &((*left)[lroffset]);
1516              const double *ptr_1 = &((*right)[rroffset]);
1517    
1518    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1519    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1520    
1521              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1522    
1523          lroffset+=leftStep;
1524          rroffset+=rightStep;
1525        }
1526        break;
1527        default:
1528        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1529      }
1530      roffset=offset;
1531      return &m_samples;
1532    }
1533    
1534    
1535    const DataTypes::ValueType*
1536    DataLazy::resolveSample(int sampleNo, size_t& roffset)
1537    {
1538    #ifdef _OPENMP
1539        int tid=omp_get_thread_num();
1540    #else
1541        int tid=0;
1542    #endif
1543    
1544    #ifdef LAZY_STACK_PROF
1545        stackstart[tid]=&tid;
1546        stackend[tid]=&tid;
1547        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1548        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1549        #pragma omp critical
1550        if (d>maxstackuse)
1551        {
1552    cout << "Max resolve Stack use " << d << endl;
1553            maxstackuse=d;
1554        }
1555        return r;
1556    #else
1557        return resolveNodeSample(tid, sampleNo, roffset);
1558    #endif
1559    }
1560    
1561    
1562    // This needs to do the work of the identity constructor
1563    void
1564    DataLazy::resolveToIdentity()
1565    {
1566       if (m_op==IDENTITY)
1567        return;
1568       DataReady_ptr p=resolveNodeWorker();
1569       makeIdentity(p);
1570  }  }
1571    
1572    void DataLazy::makeIdentity(const DataReady_ptr& p)
1573    {
1574       m_op=IDENTITY;
1575       m_axis_offset=0;
1576       m_transpose=0;
1577       m_SL=m_SM=m_SR=0;
1578       m_children=m_height=0;
1579       m_id=p;
1580       if(p->isConstant()) {m_readytype='C';}
1581       else if(p->isExpanded()) {m_readytype='E';}
1582       else if (p->isTagged()) {m_readytype='T';}
1583       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1584       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1585       m_left.reset();
1586       m_right.reset();
1587    }
1588    
1589    
1590  DataReady_ptr  DataReady_ptr
1591  DataLazy::resolve()  DataLazy::resolve()
1592  {  {
1593    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1594    // so the allocation of v needs to move inside the loop somehow      return m_id;
1595    }
1596    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1597    
1598    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1599    int numthreads=1;  void
1600    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1601    {
1602      if (dats.empty())
1603      {
1604        return;
1605      }
1606      vector<DataLazy*> work;
1607      FunctionSpace fs=dats[0]->getFunctionSpace();
1608      bool match=true;
1609      for (int i=dats.size()-1;i>=0;--i)
1610      {
1611        if (dats[i]->m_readytype!='E')
1612        {
1613            dats[i]->collapse();
1614        }
1615        if (dats[i]->m_op!=IDENTITY)
1616        {
1617            work.push_back(dats[i]);
1618            if (fs!=dats[i]->getFunctionSpace())
1619            {
1620                match=false;
1621            }
1622        }
1623      }
1624      if (work.empty())
1625      {
1626        return;     // no work to do
1627      }
1628      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1629      {     // it is possible that dats[0] is one of the objects which we discarded and
1630            // all the other functionspaces match.
1631        vector<DataExpanded*> dep;
1632        vector<ValueType*> vecs;
1633        for (int i=0;i<work.size();++i)
1634        {
1635            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1636            vecs.push_back(&(dep[i]->getVectorRW()));
1637        }
1638        int totalsamples=work[0]->getNumSamples();
1639        const ValueType* res=0; // Storage for answer
1640        int sample;
1641        #pragma omp parallel private(sample, res)
1642        {
1643            size_t roffset=0;
1644            #pragma omp for schedule(static)
1645            for (sample=0;sample<totalsamples;++sample)
1646            {
1647            roffset=0;
1648            int j;
1649            for (j=work.size()-1;j>0;--j)
1650            {
1651  #ifdef _OPENMP  #ifdef _OPENMP
1652    numthreads=omp_get_max_threads();                  res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1653    int threadnum=0;  #else
1654  #endif                  res=work[j]->resolveNodeSample(0,sample,roffset);
1655    ValueType v(numthreads*threadbuffersize);  #endif
1656  cout << "Buffer created with size=" << v.size() << endl;                  DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1657    ValueType dummy(getNoValues());                  memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1658    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);          }
1659    ValueType& resvec=result->getVector();          }
1660        }
1661        // Now we need to load the new results as identity ops into the lazy nodes
1662        for (int i=work.size()-1;i>=0;--i)
1663        {
1664            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1665        }
1666      }
1667      else  // functionspaces do not match
1668      {
1669        for (int i=0;i<work.size();++i)
1670        {
1671            work[i]->resolveToIdentity();
1672        }
1673      }
1674    }
1675    
1676    
1677    
1678    // This version of resolve uses storage in each node to hold results
1679    DataReady_ptr
1680    DataLazy::resolveNodeWorker()
1681    {
1682      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1683      {
1684        collapse();
1685      }
1686      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1687      {
1688        return m_id;
1689      }
1690        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1691      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1692      ValueType& resvec=result->getVectorRW();
1693    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1694    
1695    int sample;    int sample;
   int resoffset;  
1696    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1697    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;   // Storage for answer
1698    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1699      #pragma omp parallel private(sample,res)
1700    {    {
1701        size_t roffset=0;
1702    #ifdef LAZY_STACK_PROF
1703        stackstart[omp_get_thread_num()]=&roffset;
1704        stackend[omp_get_thread_num()]=&roffset;
1705    #endif
1706        #pragma omp for schedule(static)
1707        for (sample=0;sample<totalsamples;++sample)
1708        {
1709            roffset=0;
1710  #ifdef _OPENMP  #ifdef _OPENMP
1711      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1712  #else  #else
1713      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1714  #endif  #endif
1715      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1716      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1717      {              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1718      resvec[resoffset]=res[i];              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1719      }      }
1720    }    }
1721    #ifdef LAZY_STACK_PROF
1722      for (int i=0;i<getNumberOfThreads();++i)
1723      {
1724        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1725    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1726        if (r>maxstackuse)
1727        {
1728            maxstackuse=r;
1729        }
1730      }
1731      cout << "Max resolve Stack use=" << maxstackuse << endl;
1732    #endif
1733    return resptr;    return resptr;
1734  }  }
1735    
# Line 430  std::string Line 1737  std::string
1737  DataLazy::toString() const  DataLazy::toString() const
1738  {  {
1739    ostringstream oss;    ostringstream oss;
1740    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1741    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1742      {
1743      case 1:   // tree format
1744        oss << endl;
1745        intoTreeString(oss,"");
1746        break;
1747      case 2:   // just the depth
1748        break;
1749      default:
1750        intoString(oss);
1751        break;
1752      }
1753    return oss.str();    return oss.str();
1754  }  }
1755    
1756    
1757  void  void
1758  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1759  {  {
1760    //    oss << "[" << m_children <<";"<<m_height <<"]";
1761    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1762    {    {
1763    case G_IDENTITY:    case G_IDENTITY:
1764        if (m_id->isExpanded())
1765        {
1766           oss << "E";
1767        }
1768        else if (m_id->isTagged())
1769        {
1770          oss << "T";
1771        }
1772        else if (m_id->isConstant())
1773        {
1774          oss << "C";
1775        }
1776        else
1777        {
1778          oss << "?";
1779        }
1780      oss << '@' << m_id.get();      oss << '@' << m_id.get();
1781      break;      break;
1782    case G_BINARY:    case G_BINARY:
# Line 451  DataLazy::intoString(ostringstream& oss) Line 1787  DataLazy::intoString(ostringstream& oss)
1787      oss << ')';      oss << ')';
1788      break;      break;
1789    case G_UNARY:    case G_UNARY:
1790      case G_UNARY_P:
1791      case G_NP1OUT:
1792      case G_NP1OUT_P:
1793      case G_REDUCTION:
1794      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1795      m_left->intoString(oss);      m_left->intoString(oss);
1796      oss << ')';      oss << ')';
1797      break;      break;
1798      case G_TENSORPROD:
1799        oss << opToString(m_op) << '(';
1800        m_left->intoString(oss);
1801        oss << ", ";
1802        m_right->intoString(oss);
1803        oss << ')';
1804        break;
1805      case G_NP1OUT_2P:
1806        oss << opToString(m_op) << '(';
1807        m_left->intoString(oss);
1808        oss << ", " << m_axis_offset << ", " << m_transpose;
1809        oss << ')';
1810        break;
1811    default:    default:
1812      oss << "UNKNOWN";      oss << "UNKNOWN";
1813    }    }
1814  }  }
1815    
1816  // Note that in this case, deepCopy does not make copies of the leaves.  
1817  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1818    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1819    {
1820      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1821      switch (getOpgroup(m_op))
1822      {
1823      case G_IDENTITY:
1824        if (m_id->isExpanded())
1825        {
1826           oss << "E";
1827        }
1828        else if (m_id->isTagged())
1829        {
1830          oss << "T";
1831        }
1832        else if (m_id->isConstant())
1833        {
1834          oss << "C";
1835        }
1836        else
1837        {
1838          oss << "?";
1839        }
1840        oss << '@' << m_id.get() << endl;
1841        break;
1842      case G_BINARY:
1843        oss << opToString(m_op) << endl;
1844        indent+='.';
1845        m_left->intoTreeString(oss, indent);
1846        m_right->intoTreeString(oss, indent);
1847        break;
1848      case G_UNARY:
1849      case G_UNARY_P:
1850      case G_NP1OUT:
1851      case G_NP1OUT_P:
1852      case G_REDUCTION:
1853        oss << opToString(m_op) << endl;
1854        indent+='.';
1855        m_left->intoTreeString(oss, indent);
1856        break;
1857      case G_TENSORPROD:
1858        oss << opToString(m_op) << endl;
1859        indent+='.';
1860        m_left->intoTreeString(oss, indent);
1861        m_right->intoTreeString(oss, indent);
1862        break;
1863      case G_NP1OUT_2P:
1864        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1865        indent+='.';
1866        m_left->intoTreeString(oss, indent);
1867        break;
1868      default:
1869        oss << "UNKNOWN";
1870      }
1871    }
1872    
1873    
1874  DataAbstract*  DataAbstract*
1875  DataLazy::deepCopy()  DataLazy::deepCopy()
1876  {  {
1877    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1878    {    {
1879      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1880      case G_UNARY:
1881      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1882      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1883      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1884      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1885      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1886      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1887      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1888      default:
1889        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1890    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1891  }  }
1892    
1893    
1894    
1895    // There is no single, natural interpretation of getLength on DataLazy.
1896    // Instances of DataReady can look at the size of their vectors.
1897    // For lazy though, it could be the size the data would be if it were resolved;
1898    // or it could be some function of the lengths of the DataReady instances which
1899    // form part of the expression.
1900    // Rather than have people making assumptions, I have disabled the method.
1901  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1902  DataLazy::getLength() const  DataLazy::getLength() const
1903  {  {
1904    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1905  }  }
1906    
1907    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 1911  DataLazy::getSlice(const DataTypes::Regi
1911    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1912  }  }
1913    
1914    
1915    // To do this we need to rely on our child nodes
1916    DataTypes::ValueType::size_type
1917    DataLazy::getPointOffset(int sampleNo,
1918                     int dataPointNo)
1919    {
1920      if (m_op==IDENTITY)
1921      {
1922        return m_id->getPointOffset(sampleNo,dataPointNo);
1923      }
1924      if (m_readytype!='E')
1925      {
1926        collapse();
1927        return m_id->getPointOffset(sampleNo,dataPointNo);
1928      }
1929      // at this point we do not have an identity node and the expression will be Expanded
1930      // so we only need to know which child to ask
1931      if (m_left->m_readytype=='E')
1932      {
1933        return m_left->getPointOffset(sampleNo,dataPointNo);
1934      }
1935      else
1936      {
1937        return m_right->getPointOffset(sampleNo,dataPointNo);
1938      }
1939    }
1940    
1941    // To do this we need to rely on our child nodes
1942  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1943  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1944                   int dataPointNo) const                   int dataPointNo) const
1945  {  {
1946    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1947      {
1948        return m_id->getPointOffset(sampleNo,dataPointNo);
1949      }
1950      if (m_readytype=='E')
1951      {
1952        // at this point we do not have an identity node and the expression will be Expanded
1953        // so we only need to know which child to ask
1954        if (m_left->m_readytype=='E')
1955        {
1956        return m_left->getPointOffset(sampleNo,dataPointNo);
1957        }
1958        else
1959        {
1960        return m_right->getPointOffset(sampleNo,dataPointNo);
1961        }
1962      }
1963      if (m_readytype=='C')
1964      {
1965        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1966      }
1967      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1968    }
1969    
1970    
1971    // I have decided to let Data:: handle this issue.
1972    void
1973    DataLazy::setToZero()
1974    {
1975    //   DataTypes::ValueType v(getNoValues(),0);
1976    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1977    //   m_op=IDENTITY;
1978    //   m_right.reset();  
1979    //   m_left.reset();
1980    //   m_readytype='C';
1981    //   m_buffsRequired=1;
1982    
1983      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
1984      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1985    }
1986    
1987    bool
1988    DataLazy::actsExpanded() const
1989    {
1990        return (m_readytype=='E');
1991  }  }
1992    
1993  }   // end namespace  }   // end namespace

Legend:
Removed from v.1888  
changed lines
  Added in v.2824

  ViewVC Help
Powered by ViewVC 1.1.26