/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1879 by jfenwick, Tue Oct 14 03:54:42 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 6083 by jfenwick, Tue Mar 22 03:30:18 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18    #include "Data.h"
19    #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "Utils.h"
23    #include "DataMaths.h"
24    
25  #ifdef USE_NETCDF  #ifdef USE_NETCDF
26  #include <netcdfcpp.h>  #include <netcdfcpp.h>
27  #endif  #endif
28  #ifdef PASO_MPI  
29  #include <mpi.h>  #include <iomanip> // for some fancy formatting in debug
30  #endif  
31  #include "FunctionSpace.h"  using namespace escript::DataTypes;
32  #include "DataTypes.h"  
33  #include "Data.h"  #define NO_ARG
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48    
49    
50    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52    /*
53    How does DataLazy work?
54    ~~~~~~~~~~~~~~~~~~~~~~~
55    
56    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
57    denoted left and right.
58    
59    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
60    This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63    Note that IDENTITY is not considered a unary operation.
64    
65    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66    It must however form a DAG (directed acyclic graph).
67    I will refer to individual DataLazy objects with the structure as nodes.
68    
69    Each node also stores:
70    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71            evaluated.
72    - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
73            evaluate the expression.
74    - m_samplesize ~ the number of doubles stored in a sample.
75    
76    When a new node is created, the above values are computed based on the values in the child nodes.
77    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
78    
79    The resolve method, which produces a DataReady from a DataLazy, does the following:
80    1) Create a DataReady to hold the new result.
81    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
82    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
83    
84    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
85    
86    resolveSample returns a Vector* and an offset within that vector where the result is stored.
87    Normally, this would be v, but for identity nodes their internal vector is returned instead.
88    
89    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
90    
91    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93    
94    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95    1) Add to the ES_optype.
96    2) determine what opgroup your operation belongs to (X)
97    3) add a string for the op to the end of ES_opstrings
98    4) increase ES_opcount
99    5) add an entry (X) to opgroups
100    6) add an entry to the switch in collapseToReady
101    7) add an entry to resolveX
102    */
103    
104    
105  using namespace std;  using namespace std;
106  using namespace boost;  using namespace boost;
# Line 29  using namespace boost; Line 108  using namespace boost;
108  namespace escript  namespace escript
109  {  {
110    
 const std::string&  
 opToString(ES_optype op);  
   
111  namespace  namespace
112  {  {
113    
114    
115    // enabling this will print out when ever the maximum stacksize used by resolve increases
116    // it assumes _OPENMP is also in use
117    //#define LAZY_STACK_PROF
118    
119    
120    
121    #ifndef _OPENMP
122      #ifdef LAZY_STACK_PROF
123      #undef LAZY_STACK_PROF
124      #endif
125    #endif
126    
127    
128    #ifdef LAZY_STACK_PROF
129    std::vector<void*> stackstart(getNumberOfThreads());
130    std::vector<void*> stackend(getNumberOfThreads());
131    size_t maxstackuse=0;
132    #endif
133    
134    enum ES_opgroup
135    {
136       G_UNKNOWN,
137       G_IDENTITY,
138       G_BINARY,            // pointwise operations with two arguments
139       G_UNARY,             // pointwise operations with one argument
140       G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
141       G_NP1OUT,            // non-pointwise op with one output
142       G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
143       G_TENSORPROD,        // general tensor product
144       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
145       G_REDUCTION,         // non-pointwise unary op with a scalar output
146       G_CONDEVAL
147    };
148    
149    
150    
151    
152    string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
153                            "sin","cos","tan",
154                            "asin","acos","atan","sinh","cosh","tanh","erf",
155                            "asinh","acosh","atanh",
156                            "log10","log","sign","abs","neg","pos","exp","sqrt",
157                            "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
158                            "symmetric","nonsymmetric",
159                            "prod",
160                            "transpose", "trace",
161                            "swapaxes",
162                            "minval", "maxval",
163                            "condEval"};
164    int ES_opcount=44;
165    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
166                            G_UNARY,G_UNARY,G_UNARY, //10
167                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
168                            G_UNARY,G_UNARY,G_UNARY,                                        // 20
169                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
170                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
171                            G_NP1OUT,G_NP1OUT,
172                            G_TENSORPROD,
173                            G_NP1OUT_P, G_NP1OUT_P,
174                            G_NP1OUT_2P,
175                            G_REDUCTION, G_REDUCTION,
176                            G_CONDEVAL};
177    inline
178    ES_opgroup
179    getOpgroup(ES_optype op)
180    {
181      return opgroups[op];
182    }
183    
184  // return the FunctionSpace of the result of "left op right"  // return the FunctionSpace of the result of "left op right"
185  FunctionSpace  FunctionSpace
186  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187  {  {
188      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
189      // maybe we need an interpolate node -          // maybe we need an interpolate node -
190      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
191      // programming error exception.          // programming error exception.
192    
193      FunctionSpace l=left->getFunctionSpace();
194      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
195      {    if (l!=r)
196          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
197      }      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
198      return left->getFunctionSpace();      if (res==1)
199        {
200            return l;
201        }
202        if (res==-1)
203        {
204            return r;
205        }
206        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
207      }
208      return l;
209  }  }
210    
211  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
212    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
213  DataTypes::ShapeType  DataTypes::ShapeType
214  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
215  {  {
216      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
217      {          {
218          throw DataException("Shapes not the same - shapes must match for lazy data.");            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
219      }            {
220      return left->getShape();                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221  }            }
222    
223  size_t            if (left->getRank()==0)       // we need to allow scalar * anything
224  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)            {
225  {                  return right->getShape();
226     switch(op)            }
227     {            if (right->getRank()==0)
228  //   case IDENTITY: return left->getLength();            {
229     case ADD:    // the length is preserved in these ops                  return left->getShape();
230     case SUB:            }
231     case MUL:            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
232     case DIV: return left->getLength();          }
233     default:          return left->getShape();
234      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  }
235    
236     }  // return the shape for "op left"
237  }  
238    DataTypes::ShapeType
239  int  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  {
241  {          switch(op)
242     switch(op)          {
243     {          case TRANS:
244     case IDENTITY: return 0;             {                    // for the scoping of variables
245     case ADD:    // the length is preserved in these ops                  const DataTypes::ShapeType& s=left->getShape();
246     case SUB:                  DataTypes::ShapeType sh;
247     case MUL:                  int rank=left->getRank();
248     case DIV: return max(left->getBuffsRequired(),right->getBuffsRequired());                  if (axis_offset<0 || axis_offset>rank)
249     default:                  {
250      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");              stringstream e;
251     }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252  }              throw DataException(e.str());
253            }
254  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/"};          for (int i=0; i<rank; i++)
255  int ES_opcount=5;                  {
256                       int index = (axis_offset+i)%rank;
257  // void performOp(ValueType& v, int startOffset, ES_optype op, int m_samplesize)             sh.push_back(s[index]); // Append to new shape
258  // {          }
259  //    switch(op)                  return sh;
260  //    {             }
261  //    case ADD:  DataMaths::binaryOp(v,getShape(),startOffset,v,getShape(),          break;
262  //      startOffset+m_samplesize,plus<double>());          case TRACE:
263  //        break;                 {
264  //    case SUB:  DataMaths::binaryOp(v,getShape(),startOffset,v,getShape(),                  int rank=left->getRank();
265  //      startOffset+m_samplesize,minus<double>());                  if (rank<2)
266  //        break;                  {
267  //    case MUL:  DataMaths::binaryOp(v,getShape(),startOffset,v,getShape(),                     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268  //      startOffset+m_samplesize,multiplies<double>());                  }
269  //        break;                  if ((axis_offset>rank-2) || (axis_offset<0))
270  //    case DIV:  DataMaths::binaryOp(v,getShape(),startOffset,v,getShape(),                  {
271  //      startOffset+m_samplesize,divides<double>());                     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272  //        break;                  }
273  //    default:                  if (rank==2)
274  //  throw DataException("Programmer Error - attempt to performOp() for operator "+opToString(op)+".");                  {
275  //    }                     return DataTypes::scalarShape;
276  //                  }
277  // }                  else if (rank==3)
278                    {
279                       DataTypes::ShapeType sh;
280                       if (axis_offset==0)
281                       {
282                            sh.push_back(left->getShape()[2]);
283                       }
284                       else         // offset==1
285                       {
286                            sh.push_back(left->getShape()[0]);
287                       }
288                       return sh;
289                    }
290                    else if (rank==4)
291                    {
292                       DataTypes::ShapeType sh;
293                       const DataTypes::ShapeType& s=left->getShape();
294                       if (axis_offset==0)
295                       {
296                            sh.push_back(s[2]);
297                            sh.push_back(s[3]);
298                       }
299                       else if (axis_offset==1)
300                       {
301                            sh.push_back(s[0]);
302                            sh.push_back(s[3]);
303                       }
304                       else         // offset==2
305                       {
306                            sh.push_back(s[0]);
307                            sh.push_back(s[1]);
308                       }
309                       return sh;
310                    }
311                    else            // unknown rank
312                    {
313                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314                    }
315               }
316            break;
317            default:
318            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319            }
320    }
321    
322    DataTypes::ShapeType
323    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
324    {
325         // This code taken from the Data.cpp swapaxes() method
326         // Some of the checks are probably redundant here
327         int axis0_tmp,axis1_tmp;
328         const DataTypes::ShapeType& s=left->getShape();
329         DataTypes::ShapeType out_shape;
330         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332         int rank=left->getRank();
333         if (rank<2) {
334            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335         }
336         if (axis0<0 || axis0>rank-1) {
337            stringstream e;
338            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339            throw DataException(e.str());
340         }
341         if (axis1<0 || axis1>rank-1) {
342            stringstream e;
343            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344            throw DataException(e.str());
345         }
346         if (axis0 == axis1) {
347             throw DataException("Error - Data::swapaxes: axis indices must be different.");
348         }
349         if (axis0 > axis1) {
350             axis0_tmp=axis1;
351             axis1_tmp=axis0;
352         } else {
353             axis0_tmp=axis0;
354             axis1_tmp=axis1;
355         }
356         for (int i=0; i<rank; i++) {
357           if (i == axis0_tmp) {
358              out_shape.push_back(s[axis1_tmp]);
359           } else if (i == axis1_tmp) {
360              out_shape.push_back(s[axis0_tmp]);
361           } else {
362              out_shape.push_back(s[i]);
363           }
364         }
365        return out_shape;
366    }
367    
368    
369    // determine the output shape for the general tensor product operation
370    // the additional parameters return information required later for the product
371    // the majority of this code is copy pasted from C_General_Tensor_Product
372    DataTypes::ShapeType
373    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
374    {
375            
376      // Get rank and shape of inputs
377      int rank0 = left->getRank();
378      int rank1 = right->getRank();
379      const DataTypes::ShapeType& shape0 = left->getShape();
380      const DataTypes::ShapeType& shape1 = right->getShape();
381    
382      // Prepare for the loops of the product and verify compatibility of shapes
383      int start0=0, start1=0;
384      if (transpose == 0)           {}
385      else if (transpose == 1)      { start0 = axis_offset; }
386      else if (transpose == 2)      { start1 = rank1-axis_offset; }
387      else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388    
389  }   // end anonymous namespace    if (rank0<axis_offset)
390      {
391            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392      }
393    
394      // Adjust the shapes for transpose
395      DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
396      DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
397      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
398      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
399    
400      // Prepare for the loops of the product
401      SL=1, SM=1, SR=1;
402      for (int i=0; i<rank0-axis_offset; i++)       {
403        SL *= tmpShape0[i];
404      }
405      for (int i=rank0-axis_offset; i<rank0; i++)   {
406        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
407          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
408        }
409        SM *= tmpShape0[i];
410      }
411      for (int i=axis_offset; i<rank1; i++)         {
412        SR *= tmpShape1[i];
413      }
414    
415      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
416      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
417      {                     // block to limit the scope of out_index
418         int out_index=0;
419         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
421      }
422    
423      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
424      {
425         ostringstream os;
426         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
427         throw DataException(os.str());
428      }
429    
430      return shape2;
431    }
432    
433    }       // end anonymous namespace
434    
435    
436    
437    // Return a string representing the operation
438  const std::string&  const std::string&
439  opToString(ES_optype op)  opToString(ES_optype op)
440  {  {
# Line 132  opToString(ES_optype op) Line 445  opToString(ES_optype op)
445    return ES_opstrings[op];    return ES_opstrings[op];
446  }  }
447    
448    void DataLazy::LazyNodeSetup()
449    {
450    #ifdef _OPENMP
451        int numthreads=omp_get_max_threads();
452        m_samples.resize(numthreads*m_samplesize);
453        m_sampleids=new int[numthreads];
454        for (int i=0;i<numthreads;++i)
455        {
456            m_sampleids[i]=-1;  
457        }
458    #else
459        m_samples.resize(m_samplesize);
460        m_sampleids=new int[1];
461        m_sampleids[0]=-1;
462    #endif  // _OPENMP
463    }
464    
465    
466    // Creates an identity node
467  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
468      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
469      m_op(IDENTITY)          ,m_sampleids(0),
470            m_samples(1)
471  {  {
472     if (p->isLazy())     if (p->isLazy())
473     {     {
474      // TODO: fix this.   We could make the new node a copy of p?          // I don't want identity of Lazy.
475      // I don't want identity of Lazy.          // Question: Why would that be so bad?
476      // Question: Why would that be so bad?          // Answer: We assume that the child of ID is something we can call getVector on
477      // Answer: We assume that the child of ID is something we can call getVector on          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
     throw DataException("Programmer error - attempt to create identity from a DataLazy.");  
478     }     }
479     else     else
480     {     {
481      m_id=dynamic_pointer_cast<DataReady>(p);          p->makeLazyShared();
482            DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
483            makeIdentity(dr);
484    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
485     }     }
486     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=0;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
487  }  }
488    
489  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
490      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
491      m_left(left),          m_op(op),
492      m_right(right),          m_axis_offset(0),
493      m_op(op)          m_transpose(0),
494            m_SL(0), m_SM(0), m_SR(0)
495  {  {
496     m_length=resultLength(m_left,m_right,m_op);     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
497       {
498            throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
499       }
500    
501       DataLazy_ptr lleft;
502       if (!left->isLazy())
503       {
504            lleft=DataLazy_ptr(new DataLazy(left));
505       }
506       else
507       {
508            lleft=dynamic_pointer_cast<DataLazy>(left);
509       }
510       m_readytype=lleft->m_readytype;
511       m_left=lleft;
512     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
513     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+1;
514  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
515       LazyNodeSetup();
516       SIZELIMIT
517  }  }
518    
519    
520    // In this constructor we need to consider interpolation
521  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
522      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
523      m_op(op)          m_op(op),
524            m_SL(0), m_SM(0), m_SR(0)
525    {
526    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
527       if ((getOpgroup(op)!=G_BINARY))
528       {
529            throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
530       }
531    
532       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
533       {
534            FunctionSpace fs=getFunctionSpace();
535            Data ltemp(left);
536            Data tmp(ltemp,fs);
537            left=tmp.borrowDataPtr();
538       }
539       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
540       {
541            Data tmp(Data(right),getFunctionSpace());
542            right=tmp.borrowDataPtr();
543    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
544       }
545       left->operandCheck(*right);
546    
547       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
548       {
549            m_left=dynamic_pointer_cast<DataLazy>(left);
550    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
551       }
552       else
553       {
554            m_left=DataLazy_ptr(new DataLazy(left));
555    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
556       }
557       if (right->isLazy())
558       {
559            m_right=dynamic_pointer_cast<DataLazy>(right);
560    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
561       }
562       else
563       {
564            m_right=DataLazy_ptr(new DataLazy(right));
565    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
566       }
567       char lt=m_left->m_readytype;
568       char rt=m_right->m_readytype;
569       if (lt=='E' || rt=='E')
570       {
571            m_readytype='E';
572       }
573       else if (lt=='T' || rt=='T')
574       {
575            m_readytype='T';
576       }
577       else
578       {
579            m_readytype='C';
580       }
581       m_samplesize=getNumDPPSample()*getNoValues();
582       m_children=m_left->m_children+m_right->m_children+2;
583       m_height=max(m_left->m_height,m_right->m_height)+1;
584       LazyNodeSetup();
585       SIZELIMIT
586    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
587    }
588    
589    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
590            : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
591            m_op(op),
592            m_axis_offset(axis_offset),
593            m_transpose(transpose)
594  {  {
595     if (left->isLazy())     if ((getOpgroup(op)!=G_TENSORPROD))
596       {
597            throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
598       }
599       if ((transpose>2) || (transpose<0))
600       {
601            throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
602       }
603       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
604     {     {
605      m_left=dynamic_pointer_cast<DataLazy>(left);          FunctionSpace fs=getFunctionSpace();
606            Data ltemp(left);
607            Data tmp(ltemp,fs);
608            left=tmp.borrowDataPtr();
609       }
610       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
611       {
612            Data tmp(Data(right),getFunctionSpace());
613            right=tmp.borrowDataPtr();
614       }
615    //    left->operandCheck(*right);
616    
617       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
618       {
619            m_left=dynamic_pointer_cast<DataLazy>(left);
620     }     }
621     else     else
622     {     {
623      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
624     }     }
625     if (right->isLazy())     if (right->isLazy())
626     {     {
627      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
628       }
629       else
630       {
631            m_right=DataLazy_ptr(new DataLazy(right));
632       }
633       char lt=m_left->m_readytype;
634       char rt=m_right->m_readytype;
635       if (lt=='E' || rt=='E')
636       {
637            m_readytype='E';
638       }
639       else if (lt=='T' || rt=='T')
640       {
641            m_readytype='T';
642       }
643       else
644       {
645            m_readytype='C';
646       }
647       m_samplesize=getNumDPPSample()*getNoValues();
648       m_children=m_left->m_children+m_right->m_children+2;
649       m_height=max(m_left->m_height,m_right->m_height)+1;
650       LazyNodeSetup();
651       SIZELIMIT
652    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
653    }
654    
655    
656    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
657            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
658            m_op(op),
659            m_axis_offset(axis_offset),
660            m_transpose(0),
661            m_tol(0)
662    {
663       if ((getOpgroup(op)!=G_NP1OUT_P))
664       {
665            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
666       }
667       DataLazy_ptr lleft;
668       if (!left->isLazy())
669       {
670            lleft=DataLazy_ptr(new DataLazy(left));
671       }
672       else
673       {
674            lleft=dynamic_pointer_cast<DataLazy>(left);
675       }
676       m_readytype=lleft->m_readytype;
677       m_left=lleft;
678       m_samplesize=getNumDPPSample()*getNoValues();
679       m_children=m_left->m_children+1;
680       m_height=m_left->m_height+1;
681       LazyNodeSetup();
682       SIZELIMIT
683    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
684    }
685    
686    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
687            : parent(left->getFunctionSpace(), left->getShape()),
688            m_op(op),
689            m_axis_offset(0),
690            m_transpose(0),
691            m_tol(tol)
692    {
693       if ((getOpgroup(op)!=G_UNARY_P))
694       {
695            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
696       }
697       DataLazy_ptr lleft;
698       if (!left->isLazy())
699       {
700            lleft=DataLazy_ptr(new DataLazy(left));
701       }
702       else
703       {
704            lleft=dynamic_pointer_cast<DataLazy>(left);
705       }
706       m_readytype=lleft->m_readytype;
707       m_left=lleft;
708       m_samplesize=getNumDPPSample()*getNoValues();
709       m_children=m_left->m_children+1;
710       m_height=m_left->m_height+1;
711       LazyNodeSetup();
712       SIZELIMIT
713    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
714    }
715    
716    
717    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
718            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
719            m_op(op),
720            m_axis_offset(axis0),
721            m_transpose(axis1),
722            m_tol(0)
723    {
724       if ((getOpgroup(op)!=G_NP1OUT_2P))
725       {
726            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
727       }
728       DataLazy_ptr lleft;
729       if (!left->isLazy())
730       {
731            lleft=DataLazy_ptr(new DataLazy(left));
732     }     }
733     else     else
734     {     {
735      m_right=DataLazy_ptr(new DataLazy(right));          lleft=dynamic_pointer_cast<DataLazy>(left);
736     }     }
737       m_readytype=lleft->m_readytype;
738       m_left=lleft;
739       m_samplesize=getNumDPPSample()*getNoValues();
740       m_children=m_left->m_children+1;
741       m_height=m_left->m_height+1;
742       LazyNodeSetup();
743       SIZELIMIT
744    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
745    }
746    
747    
748    namespace
749    {
750    
751        inline int max3(int a, int b, int c)
752        {
753            int t=(a>b?a:b);
754            return (t>c?t:c);
755    
756     m_length=resultLength(m_left,m_right,m_op);      }
757    }
758    
759    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
760            : parent(left->getFunctionSpace(), left->getShape()),
761            m_op(CONDEVAL),
762            m_axis_offset(0),
763            m_transpose(0),
764            m_tol(0)
765    {
766    
767       DataLazy_ptr lmask;
768       DataLazy_ptr lleft;
769       DataLazy_ptr lright;
770       if (!mask->isLazy())
771       {
772            lmask=DataLazy_ptr(new DataLazy(mask));
773       }
774       else
775       {
776            lmask=dynamic_pointer_cast<DataLazy>(mask);
777       }
778       if (!left->isLazy())
779       {
780            lleft=DataLazy_ptr(new DataLazy(left));
781       }
782       else
783       {
784            lleft=dynamic_pointer_cast<DataLazy>(left);
785       }
786       if (!right->isLazy())
787       {
788            lright=DataLazy_ptr(new DataLazy(right));
789       }
790       else
791       {
792            lright=dynamic_pointer_cast<DataLazy>(right);
793       }
794       m_readytype=lmask->m_readytype;
795       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
796       {
797            throw DataException("Programmer Error - condEval arguments must have the same readytype");
798       }
799       m_left=lleft;
800       m_right=lright;
801       m_mask=lmask;
802     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
803     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
804  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
805       LazyNodeSetup();
806       SIZELIMIT
807    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
808  }  }
809    
810    
811    
812  DataLazy::~DataLazy()  DataLazy::~DataLazy()
813  {  {
814       delete[] m_sampleids;
815  }  }
816    
817    
818  int  /*
819  DataLazy::getBuffsRequired() const    \brief Evaluates the expression using methods on Data.
820      This does the work for the collapse method.
821      For reasons of efficiency do not call this method on DataExpanded nodes.
822    */
823    DataReady_ptr
824    DataLazy::collapseToReady() const
825  {  {
826      return m_buffsRequired;    if (m_readytype=='E')
827      {     // this is more an efficiency concern than anything else
828        throw DataException("Programmer Error - do not use collapse on Expanded data.");
829      }
830      if (m_op==IDENTITY)
831      {
832        return m_id;
833      }
834      DataReady_ptr pleft=m_left->collapseToReady();
835      Data left(pleft);
836      Data right;
837      if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
838      {
839        right=Data(m_right->collapseToReady());
840      }
841      Data result;
842      switch(m_op)
843      {
844        case ADD:
845            result=left+right;
846            break;
847        case SUB:          
848            result=left-right;
849            break;
850        case MUL:          
851            result=left*right;
852            break;
853        case DIV:          
854            result=left/right;
855            break;
856        case SIN:
857            result=left.sin();      
858            break;
859        case COS:
860            result=left.cos();
861            break;
862        case TAN:
863            result=left.tan();
864            break;
865        case ASIN:
866            result=left.asin();
867            break;
868        case ACOS:
869            result=left.acos();
870            break;
871        case ATAN:
872            result=left.atan();
873            break;
874        case SINH:
875            result=left.sinh();
876            break;
877        case COSH:
878            result=left.cosh();
879            break;
880        case TANH:
881            result=left.tanh();
882            break;
883        case ERF:
884            result=left.erf();
885            break;
886       case ASINH:
887            result=left.asinh();
888            break;
889       case ACOSH:
890            result=left.acosh();
891            break;
892       case ATANH:
893            result=left.atanh();
894            break;
895        case LOG10:
896            result=left.log10();
897            break;
898        case LOG:
899            result=left.log();
900            break;
901        case SIGN:
902            result=left.sign();
903            break;
904        case ABS:
905            result=left.abs();
906            break;
907        case NEG:
908            result=left.neg();
909            break;
910        case POS:
911            // it doesn't mean anything for delayed.
912            // it will just trigger a deep copy of the lazy object
913            throw DataException("Programmer error - POS not supported for lazy data.");
914            break;
915        case EXP:
916            result=left.exp();
917            break;
918        case SQRT:
919            result=left.sqrt();
920            break;
921        case RECIP:
922            result=left.oneOver();
923            break;
924        case GZ:
925            result=left.wherePositive();
926            break;
927        case LZ:
928            result=left.whereNegative();
929            break;
930        case GEZ:
931            result=left.whereNonNegative();
932            break;
933        case LEZ:
934            result=left.whereNonPositive();
935            break;
936        case NEZ:
937            result=left.whereNonZero(m_tol);
938            break;
939        case EZ:
940            result=left.whereZero(m_tol);
941            break;
942        case SYM:
943            result=left.symmetric();
944            break;
945        case NSYM:
946            result=left.nonsymmetric();
947            break;
948        case PROD:
949            result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
950            break;
951        case TRANS:
952            result=left.transpose(m_axis_offset);
953            break;
954        case TRACE:
955            result=left.trace(m_axis_offset);
956            break;
957        case SWAP:
958            result=left.swapaxes(m_axis_offset, m_transpose);
959            break;
960        case MINVAL:
961            result=left.minval();
962            break;
963        case MAXVAL:
964            result=left.minval();
965            break;
966        default:
967            throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
968      }
969      return result.borrowReadyPtr();
970  }  }
971    
972    /*
973       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
974       This method uses the original methods on the Data class to evaluate the expressions.
975       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
976       the purpose of using DataLazy in the first place).
977    */
978  void  void
979  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  DataLazy::collapse() const
980  {  {
981    if (m_op==IDENTITY)   // copy the contents into the vector    if (m_op==IDENTITY)
982    {    {
983  cout << "Begin ID" << endl;          return;
984  cout << "dpps=" << getNumDPPSample() << " novals=" << getNoValues() << endl;    }
985      const ValueType& vec=m_id->getVector();    if (m_readytype=='E')
986      size_t srcOffset=m_id->getPointOffset(sampleNo, 0);    {     // this is more an efficiency concern than anything else
987  cout << "v.size()=" << v.size() << " vec=" << vec.size() << endl;      throw DataException("Programmer Error - do not use collapse on Expanded data.");
988      for (size_t i=0;i<m_samplesize;++i,++srcOffset,++offset)    }
989      {    m_id=collapseToReady();
990  cout << "Trying offset=" << offset << " srcOffset=" << srcOffset << endl;    m_op=IDENTITY;
991      v[offset]=vec[srcOffset];    }
992      }  
993  cout << "End ID" << endl;  // The result will be stored in m_samples
994      return;  // The return value is a pointer to the DataVector, offset is the offset within the return value
995    const DataTypes::RealVectorType*
996    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
997    {
998    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
999            // collapse so we have a 'E' node or an IDENTITY for some other type
1000      if (m_readytype!='E' && m_op!=IDENTITY)
1001      {
1002            collapse();
1003    }    }
1004    size_t rightoffset=offset+m_samplesize;    if (m_op==IDENTITY)  
   m_left->resolveSample(v,sampleNo,offset);  
   m_right->resolveSample(v,sampleNo,rightoffset);  
 //  for (int i=0;i<getNumDPPSample();++i)  
1005    {    {
1006      switch(m_op)      const RealVectorType& vec=m_id->getVectorRO();
1007      {      roffset=m_id->getPointOffset(sampleNo, 0);
1008      case ADD:       // since these are pointwise ops, pretend each sample is one point  #ifdef LAZY_STACK_PROF
1009      tensor_binary_operation(m_samplesize,&(v[offset]),&(v[rightoffset]),&(v[offset]),plus<double>());  int x;
1010    if (&x<stackend[omp_get_thread_num()])
1011    {
1012           stackend[omp_get_thread_num()]=&x;
1013    }
1014    #endif
1015        return &(vec);
1016      }
1017      if (m_readytype!='E')
1018      {
1019        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1020      }
1021      if (m_sampleids[tid]==sampleNo)
1022      {
1023            roffset=tid*m_samplesize;
1024            return &(m_samples);            // sample is already resolved
1025      }
1026      m_sampleids[tid]=sampleNo;
1027    
1028      switch (getOpgroup(m_op))
1029      {
1030      case G_UNARY:
1031      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1032      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1033      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1034      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1035      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1036      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1037      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1038      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1039      default:
1040        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1041      }
1042    }
1043    
1044    const DataTypes::RealVectorType*
1045    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1046    {
1047            // we assume that any collapsing has been done before we get here
1048            // since we only have one argument we don't need to think about only
1049            // processing single points.
1050            // we will also know we won't get identity nodes
1051      if (m_readytype!='E')
1052      {
1053        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1054      }
1055      if (m_op==IDENTITY)
1056      {
1057        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1058      }
1059      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1060      const double* left=&((*leftres)[roffset]);
1061      roffset=m_samplesize*tid;
1062      double* result=&(m_samples[roffset]);
1063      escript::ESFunction operation=SINF;
1064      switch (m_op)
1065      {
1066        case SIN:
1067        operation=SINF;
1068        break;
1069        case COS:
1070            operation=COSF;
1071        break;
1072        case TAN:
1073            operation=TANF;
1074        break;
1075        case ASIN:
1076            operation=ASINF;
1077        break;
1078        case ACOS:
1079            operation=ACOSF;
1080        break;
1081        case ATAN:
1082            operation=ATANF;
1083        break;
1084        case SINH:
1085            operation=SINHF;
1086        break;
1087        case COSH:
1088            operation=COSHF;
1089      break;      break;
1090      case SUB:            case TANH:
1091      tensor_binary_operation(m_samplesize,&(v[offset]),&(v[rightoffset]),&(v[offset]),minus<double>());          operation=TANHF;
1092      break;      break;
1093      case MUL:            case ERF:
1094      tensor_binary_operation(m_samplesize,&(v[offset]),&(v[rightoffset]),&(v[offset]),multiplies<double>());          operation=ERFF;
1095      break;      break;
1096      case DIV:           case ASINH:
1097      tensor_binary_operation(m_samplesize,&(v[offset]),&(v[rightoffset]),&(v[offset]),divides<double>());          operation=ASINHF;
1098        break;
1099       case ACOSH:
1100            operation=ACOSHF;
1101        break;
1102       case ATANH:
1103            operation=ATANHF;
1104        break;
1105        case LOG10:
1106            operation=LOG10F;
1107        break;
1108        case LOG:
1109            operation=LOGF;
1110        break;
1111        case SIGN:
1112            operation=SIGNF;
1113        break;
1114        case ABS:
1115            operation=ABSF;
1116        break;
1117        case NEG:
1118            operation=NEGF;
1119        break;
1120        case POS:
1121            // it doesn't mean anything for delayed.
1122            // it will just trigger a deep copy of the lazy object
1123            throw DataException("Programmer error - POS not supported for lazy data.");
1124            break;
1125        case EXP:
1126            operation=EXPF;
1127        break;
1128        case SQRT:
1129            operation=SQRTF;
1130        break;
1131        case RECIP:
1132            operation=INVF;
1133        break;
1134        case GZ:
1135            operation=GTZEROF;
1136        break;
1137        case LZ:
1138            operation=LTZEROF;
1139        break;
1140        case GEZ:
1141            operation=GEZEROF;
1142        break;
1143        case LEZ:
1144            operation=LEZEROF;
1145        break;
1146    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1147        case NEZ:
1148            operation=NEQZEROF;
1149        break;
1150        case EZ:
1151            operation=EQZEROF;
1152      break;      break;
1153      default:      default:
1154      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1155      }    }
1156      tensor_unary_array_operation(m_samplesize,
1157                                 left,
1158                                 result,
1159                                 operation,
1160                                 m_tol);  
1161      return &(m_samples);
1162    }
1163    
1164    
1165    const DataTypes::RealVectorType*
1166    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1167    {
1168            // we assume that any collapsing has been done before we get here
1169            // since we only have one argument we don't need to think about only
1170            // processing single points.
1171            // we will also know we won't get identity nodes
1172      if (m_readytype!='E')
1173      {
1174        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1175      }
1176      if (m_op==IDENTITY)
1177      {
1178        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1179      }
1180      size_t loffset=0;
1181      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1182    
1183      roffset=m_samplesize*tid;
1184      unsigned int ndpps=getNumDPPSample();
1185      unsigned int psize=DataTypes::noValues(m_left->getShape());
1186      double* result=&(m_samples[roffset]);
1187      switch (m_op)
1188      {
1189        case MINVAL:
1190            {
1191              for (unsigned int z=0;z<ndpps;++z)
1192              {
1193                FMin op;
1194                *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1195                loffset+=psize;
1196                result++;
1197              }
1198            }
1199            break;
1200        case MAXVAL:
1201            {
1202              for (unsigned int z=0;z<ndpps;++z)
1203              {
1204              FMax op;
1205              *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1206              loffset+=psize;
1207              result++;
1208              }
1209            }
1210            break;
1211        default:
1212            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1213      }
1214      return &(m_samples);
1215    }
1216    
1217    const DataTypes::RealVectorType*
1218    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1219    {
1220            // we assume that any collapsing has been done before we get here
1221            // since we only have one argument we don't need to think about only
1222            // processing single points.
1223      if (m_readytype!='E')
1224      {
1225        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1226      }
1227      if (m_op==IDENTITY)
1228      {
1229        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1230      }
1231      size_t subroffset;
1232      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1233      roffset=m_samplesize*tid;
1234      size_t loop=0;
1235      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1236      size_t step=getNoValues();
1237      size_t offset=roffset;
1238      switch (m_op)
1239      {
1240        case SYM:
1241            for (loop=0;loop<numsteps;++loop)
1242            {
1243                escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1244                subroffset+=step;
1245                offset+=step;
1246            }
1247            break;
1248        case NSYM:
1249            for (loop=0;loop<numsteps;++loop)
1250            {
1251                escript::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1252                subroffset+=step;
1253                offset+=step;
1254            }
1255            break;
1256        default:
1257            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1258      }
1259      return &m_samples;
1260    }
1261    
1262    const DataTypes::RealVectorType*
1263    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1264    {
1265            // we assume that any collapsing has been done before we get here
1266            // since we only have one argument we don't need to think about only
1267            // processing single points.
1268      if (m_readytype!='E')
1269      {
1270        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1271      }
1272      if (m_op==IDENTITY)
1273      {
1274        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1275      }
1276      size_t subroffset;
1277      size_t offset;
1278      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1279      roffset=m_samplesize*tid;
1280      offset=roffset;
1281      size_t loop=0;
1282      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1283      size_t outstep=getNoValues();
1284      size_t instep=m_left->getNoValues();
1285      switch (m_op)
1286      {
1287        case TRACE:
1288            for (loop=0;loop<numsteps;++loop)
1289            {
1290                escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1291                subroffset+=instep;
1292                offset+=outstep;
1293            }
1294            break;
1295        case TRANS:
1296            for (loop=0;loop<numsteps;++loop)
1297            {
1298                escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1299                subroffset+=instep;
1300                offset+=outstep;
1301            }
1302            break;
1303        default:
1304            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1305      }
1306      return &m_samples;
1307    }
1308    
1309    
1310    const DataTypes::RealVectorType*
1311    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1312    {
1313      if (m_readytype!='E')
1314      {
1315        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1316      }
1317      if (m_op==IDENTITY)
1318      {
1319        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1320      }
1321      size_t subroffset;
1322      size_t offset;
1323      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1324      roffset=m_samplesize*tid;
1325      offset=roffset;
1326      size_t loop=0;
1327      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1328      size_t outstep=getNoValues();
1329      size_t instep=m_left->getNoValues();
1330      switch (m_op)
1331      {
1332        case SWAP:
1333            for (loop=0;loop<numsteps;++loop)
1334            {
1335                escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1336                subroffset+=instep;
1337                offset+=outstep;
1338            }
1339            break;
1340        default:
1341            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1342      }
1343      return &m_samples;
1344    }
1345    
1346    const DataTypes::RealVectorType*
1347    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1348    {
1349      if (m_readytype!='E')
1350      {
1351        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1352      }
1353      if (m_op!=CONDEVAL)
1354      {
1355        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1356      }
1357      size_t subroffset;
1358    
1359      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1360      const RealVectorType* srcres=0;
1361      if ((*maskres)[subroffset]>0)
1362      {
1363            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1364      }
1365      else
1366      {
1367            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1368      }
1369    
1370      // Now we need to copy the result
1371    
1372      roffset=m_samplesize*tid;
1373      for (int i=0;i<m_samplesize;++i)
1374      {
1375            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1376      }
1377    
1378      return &m_samples;
1379    }
1380    
1381    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1382    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1383    // If both children are expanded, then we can process them in a single operation (we treat
1384    // the whole sample as one big datapoint.
1385    // If one of the children is not expanded, then we need to treat each point in the sample
1386    // individually.
1387    // There is an additional complication when scalar operations are considered.
1388    // For example, 2+Vector.
1389    // In this case each double within the point is treated individually
1390    const DataTypes::RealVectorType*
1391    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1392    {
1393    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1394    
1395      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1396            // first work out which of the children are expanded
1397      bool leftExp=(m_left->m_readytype=='E');
1398      bool rightExp=(m_right->m_readytype=='E');
1399      if (!leftExp && !rightExp)
1400      {
1401            throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1402      }
1403      bool leftScalar=(m_left->getRank()==0);
1404      bool rightScalar=(m_right->getRank()==0);
1405      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1406      {
1407            throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1408      }
1409      size_t leftsize=m_left->getNoValues();
1410      size_t rightsize=m_right->getNoValues();
1411      size_t chunksize=1;                   // how many doubles will be processed in one go
1412      int leftstep=0;               // how far should the left offset advance after each step
1413      int rightstep=0;
1414      int numsteps=0;               // total number of steps for the inner loop
1415      int oleftstep=0;      // the o variables refer to the outer loop
1416      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1417      int onumsteps=1;
1418      
1419      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1420      bool RES=(rightExp && rightScalar);
1421      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1422      bool RS=(!rightExp && rightScalar);
1423      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1424      bool RN=(!rightExp && !rightScalar);
1425      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1426      bool REN=(rightExp && !rightScalar);
1427    
1428      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1429      {
1430            chunksize=m_left->getNumDPPSample()*leftsize;
1431            leftstep=0;
1432            rightstep=0;
1433            numsteps=1;
1434      }
1435      else if (LES || RES)
1436      {
1437            chunksize=1;
1438            if (LES)                // left is an expanded scalar
1439            {
1440                    if (RS)
1441                    {
1442                       leftstep=1;
1443                       rightstep=0;
1444                       numsteps=m_left->getNumDPPSample();
1445                    }
1446                    else            // RN or REN
1447                    {
1448                       leftstep=0;
1449                       oleftstep=1;
1450                       rightstep=1;
1451                       orightstep=(RN ? -(int)rightsize : 0);
1452                       numsteps=rightsize;
1453                       onumsteps=m_left->getNumDPPSample();
1454                    }
1455            }
1456            else            // right is an expanded scalar
1457            {
1458                    if (LS)
1459                    {
1460                       rightstep=1;
1461                       leftstep=0;
1462                       numsteps=m_right->getNumDPPSample();
1463                    }
1464                    else
1465                    {
1466                       rightstep=0;
1467                       orightstep=1;
1468                       leftstep=1;
1469                       oleftstep=(LN ? -(int)leftsize : 0);
1470                       numsteps=leftsize;
1471                       onumsteps=m_right->getNumDPPSample();
1472                    }
1473            }
1474      }
1475      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1476      {
1477            if (LEN)        // and Right will be a single value
1478            {
1479                    chunksize=rightsize;
1480                    leftstep=rightsize;
1481                    rightstep=0;
1482                    numsteps=m_left->getNumDPPSample();
1483                    if (RS)
1484                    {
1485                       numsteps*=leftsize;
1486                    }
1487            }
1488            else    // REN
1489            {
1490                    chunksize=leftsize;
1491                    rightstep=leftsize;
1492                    leftstep=0;
1493                    numsteps=m_right->getNumDPPSample();
1494                    if (LS)
1495                    {
1496                       numsteps*=rightsize;
1497                    }
1498            }
1499      }
1500    
1501      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1502            // Get the values of sub-expressions
1503      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1504      const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1505    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1506    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1507    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1508    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1509    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1510    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1511    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1512    
1513    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1514    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1515    
1516    
1517      roffset=m_samplesize*tid;
1518      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1519      switch(m_op)
1520      {
1521        case ADD:
1522            //PROC_OP(NO_ARG,plus<double>());
1523          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1524                 &(*left)[0],
1525                 &(*right)[0],
1526                 chunksize,
1527                 onumsteps,
1528                 numsteps,
1529                 resultStep,
1530                 leftstep,
1531                 rightstep,
1532                 oleftstep,
1533                 orightstep,
1534                 lroffset,
1535                 rroffset,
1536                 escript::ESFunction::PLUSF);  
1537            break;
1538        case SUB:
1539          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1540                 &(*left)[0],
1541                 &(*right)[0],
1542                 chunksize,
1543                 onumsteps,
1544                 numsteps,
1545                 resultStep,
1546                 leftstep,
1547                 rightstep,
1548                 oleftstep,
1549                 orightstep,
1550                 lroffset,
1551                 rroffset,
1552                 escript::ESFunction::MINUSF);        
1553            //PROC_OP(NO_ARG,minus<double>());
1554            break;
1555        case MUL:
1556            //PROC_OP(NO_ARG,multiplies<double>());
1557          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1558                 &(*left)[0],
1559                 &(*right)[0],
1560                 chunksize,
1561                 onumsteps,
1562                 numsteps,
1563                 resultStep,
1564                 leftstep,
1565                 rightstep,
1566                 oleftstep,
1567                 orightstep,
1568                 lroffset,
1569                 rroffset,
1570                 escript::ESFunction::MULTIPLIESF);      
1571            break;
1572        case DIV:
1573            //PROC_OP(NO_ARG,divides<double>());
1574          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1575                 &(*left)[0],
1576                 &(*right)[0],
1577                 chunksize,
1578                 onumsteps,
1579                 numsteps,
1580                 resultStep,
1581                 leftstep,
1582                 rightstep,
1583                 oleftstep,
1584                 orightstep,
1585                 lroffset,
1586                 rroffset,
1587                 escript::ESFunction::DIVIDESF);          
1588            break;
1589        case POW:
1590           //PROC_OP(double (double,double),::pow);
1591          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1592                 &(*left)[0],
1593                 &(*right)[0],
1594                 chunksize,
1595                 onumsteps,
1596                 numsteps,
1597                 resultStep,
1598                 leftstep,
1599                 rightstep,
1600                 oleftstep,
1601                 orightstep,
1602                 lroffset,
1603                 rroffset,
1604                 escript::ESFunction::POWF);          
1605            break;
1606        default:
1607            throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1608      }
1609    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1610      return &m_samples;
1611    }
1612    
1613    
1614    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1615    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1616    // unlike the other resolve helpers, we must treat these datapoints separately.
1617    const DataTypes::RealVectorType*
1618    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1619    {
1620    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1621    
1622      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1623            // first work out which of the children are expanded
1624      bool leftExp=(m_left->m_readytype=='E');
1625      bool rightExp=(m_right->m_readytype=='E');
1626      int steps=getNumDPPSample();
1627      int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1628      int rightStep=(rightExp?m_right->getNoValues() : 0);
1629    
1630      int resultStep=getNoValues();
1631      roffset=m_samplesize*tid;
1632      size_t offset=roffset;
1633    
1634      const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1635    
1636      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1637    
1638    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1639    cout << getNoValues() << endl;)
1640    
1641    
1642    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1643    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1644    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1645    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1646    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1647    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1648    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1649    
1650      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1651      switch(m_op)
1652      {
1653        case PROD:
1654            for (int i=0;i<steps;++i,resultp+=resultStep)
1655            {
1656              const double *ptr_0 = &((*left)[lroffset]);
1657              const double *ptr_1 = &((*right)[rroffset]);
1658    
1659    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1660    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1661    
1662              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1663    
1664              lroffset+=leftStep;
1665              rroffset+=rightStep;
1666            }
1667            break;
1668        default:
1669            throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1670    }    }
1671      roffset=offset;
1672      return &m_samples;
1673  }  }
1674    
1675    
1676    const DataTypes::RealVectorType*
1677    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1678    {
1679    #ifdef _OPENMP
1680            int tid=omp_get_thread_num();
1681    #else
1682            int tid=0;
1683    #endif
1684    
1685    #ifdef LAZY_STACK_PROF
1686            stackstart[tid]=&tid;
1687            stackend[tid]=&tid;
1688            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1689            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1690            #pragma omp critical
1691            if (d>maxstackuse)
1692            {
1693    cout << "Max resolve Stack use " << d << endl;
1694                    maxstackuse=d;
1695            }
1696            return r;
1697    #else
1698            return resolveNodeSample(tid, sampleNo, roffset);
1699    #endif
1700    }
1701    
1702    
1703    // This needs to do the work of the identity constructor
1704    void
1705    DataLazy::resolveToIdentity()
1706    {
1707       if (m_op==IDENTITY)
1708            return;
1709       DataReady_ptr p=resolveNodeWorker();
1710       makeIdentity(p);
1711    }
1712    
1713    void DataLazy::makeIdentity(const DataReady_ptr& p)
1714    {
1715       m_op=IDENTITY;
1716       m_axis_offset=0;
1717       m_transpose=0;
1718       m_SL=m_SM=m_SR=0;
1719       m_children=m_height=0;
1720       m_id=p;
1721       if(p->isConstant()) {m_readytype='C';}
1722       else if(p->isExpanded()) {m_readytype='E';}
1723       else if (p->isTagged()) {m_readytype='T';}
1724       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1725       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1726       m_left.reset();
1727       m_right.reset();
1728    }
1729    
1730    
1731  DataReady_ptr  DataReady_ptr
1732  DataLazy::resolve()  DataLazy::resolve()
1733  {  {
1734    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1735    // so the allocation of v needs to move inside the loop somehow      return m_id;
1736    }
1737    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1738    
1739    ValueType v(m_samplesize*max(1,m_buffsRequired));  /* This is really a static method but I think that caused problems in windows */
1740  cout << "Buffer created with size=" << v.size() << endl;  void
1741    ValueType dummy(getNoValues());  DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1742    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);  {
1743    ValueType& resvec=result->getVector();    if (dats.empty())
1744      {
1745            return;
1746      }
1747      vector<DataLazy*> work;
1748      FunctionSpace fs=dats[0]->getFunctionSpace();
1749      bool match=true;
1750      for (int i=dats.size()-1;i>=0;--i)
1751      {
1752            if (dats[i]->m_readytype!='E')
1753            {
1754                    dats[i]->collapse();
1755            }
1756            if (dats[i]->m_op!=IDENTITY)
1757            {
1758                    work.push_back(dats[i]);
1759                    if (fs!=dats[i]->getFunctionSpace())
1760                    {
1761                            match=false;
1762                    }
1763            }
1764      }
1765      if (work.empty())
1766      {
1767            return;         // no work to do
1768      }
1769      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1770      {             // it is possible that dats[0] is one of the objects which we discarded and
1771                    // all the other functionspaces match.
1772            vector<DataExpanded*> dep;
1773            vector<RealVectorType*> vecs;
1774            for (int i=0;i<work.size();++i)
1775            {
1776                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1777                    vecs.push_back(&(dep[i]->getVectorRW()));
1778            }
1779            int totalsamples=work[0]->getNumSamples();
1780            const RealVectorType* res=0; // Storage for answer
1781            int sample;
1782            #pragma omp parallel private(sample, res)
1783            {
1784                size_t roffset=0;
1785                #pragma omp for schedule(static)
1786                for (sample=0;sample<totalsamples;++sample)
1787                {
1788                    roffset=0;
1789                    int j;
1790                    for (j=work.size()-1;j>=0;--j)
1791                    {
1792    #ifdef _OPENMP
1793                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1794    #else
1795                        res=work[j]->resolveNodeSample(0,sample,roffset);
1796    #endif
1797                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1798                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1799                    }
1800                }
1801            }
1802            // Now we need to load the new results as identity ops into the lazy nodes
1803            for (int i=work.size()-1;i>=0;--i)
1804            {
1805                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1806            }
1807      }
1808      else  // functionspaces do not match
1809      {
1810            for (int i=0;i<work.size();++i)
1811            {
1812                    work[i]->resolveToIdentity();
1813            }
1814      }
1815    }
1816    
1817    
1818    
1819    // This version of resolve uses storage in each node to hold results
1820    DataReady_ptr
1821    DataLazy::resolveNodeWorker()
1822    {
1823      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1824      {
1825        collapse();
1826      }
1827      if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1828      {
1829        return m_id;
1830      }
1831            // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1832      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1833      RealVectorType& resvec=result->getVectorRW();
1834    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1835    
1836    int sample;    int sample;
1837    #pragma omp parallel for private(sample) schedule(static)    int totalsamples=getNumSamples();
1838    for (sample=0;sample<getNumSamples();++sample)    const RealVectorType* res=0;       // Storage for answer
1839    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1840      #pragma omp parallel private(sample,res)
1841    {    {
1842      resolveSample(v,sample,0);          size_t roffset=0;
1843      for (int i=0;i<m_samplesize;++i)    // copy values into the output vector  #ifdef LAZY_STACK_PROF
1844      {          stackstart[omp_get_thread_num()]=&roffset;
1845      resvec[i]=v[i];          stackend[omp_get_thread_num()]=&roffset;
1846      }  #endif
1847            #pragma omp for schedule(static)
1848            for (sample=0;sample<totalsamples;++sample)
1849            {
1850                    roffset=0;
1851    #ifdef _OPENMP
1852                    res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1853    #else
1854                    res=resolveNodeSample(0,sample,roffset);
1855    #endif
1856    LAZYDEBUG(cout << "Sample #" << sample << endl;)
1857    LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1858                    RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1859                    memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1860            }
1861    }    }
1862    #ifdef LAZY_STACK_PROF
1863      for (int i=0;i<getNumberOfThreads();++i)
1864      {
1865            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1866    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1867            if (r>maxstackuse)
1868            {
1869                    maxstackuse=r;
1870            }
1871      }
1872      cout << "Max resolve Stack use=" << maxstackuse << endl;
1873    #endif
1874    return resptr;    return resptr;
1875  }  }
1876    
1877  std::string  std::string
1878  DataLazy::toString() const  DataLazy::toString() const
1879  {  {
1880    return "Lazy evaluation object. No details available.";    ostringstream oss;
1881      oss << "Lazy Data: [depth=" << m_height<< "] ";
1882      switch (escriptParams.getLAZY_STR_FMT())
1883      {
1884      case 1:       // tree format
1885            oss << endl;
1886            intoTreeString(oss,"");
1887            break;
1888      case 2:       // just the depth
1889            break;
1890      default:
1891            intoString(oss);
1892            break;
1893      }
1894      return oss.str();
1895    }
1896    
1897    
1898    void
1899    DataLazy::intoString(ostringstream& oss) const
1900    {
1901    //    oss << "[" << m_children <<";"<<m_height <<"]";
1902      switch (getOpgroup(m_op))
1903      {
1904      case G_IDENTITY:
1905            if (m_id->isExpanded())
1906            {
1907               oss << "E";
1908            }
1909            else if (m_id->isTagged())
1910            {
1911              oss << "T";
1912            }
1913            else if (m_id->isConstant())
1914            {
1915              oss << "C";
1916            }
1917            else
1918            {
1919              oss << "?";
1920            }
1921            oss << '@' << m_id.get();
1922            break;
1923      case G_BINARY:
1924            oss << '(';
1925            m_left->intoString(oss);
1926            oss << ' ' << opToString(m_op) << ' ';
1927            m_right->intoString(oss);
1928            oss << ')';
1929            break;
1930      case G_UNARY:
1931      case G_UNARY_P:
1932      case G_NP1OUT:
1933      case G_NP1OUT_P:
1934      case G_REDUCTION:
1935            oss << opToString(m_op) << '(';
1936            m_left->intoString(oss);
1937            oss << ')';
1938            break;
1939      case G_TENSORPROD:
1940            oss << opToString(m_op) << '(';
1941            m_left->intoString(oss);
1942            oss << ", ";
1943            m_right->intoString(oss);
1944            oss << ')';
1945            break;
1946      case G_NP1OUT_2P:
1947            oss << opToString(m_op) << '(';
1948            m_left->intoString(oss);
1949            oss << ", " << m_axis_offset << ", " << m_transpose;
1950            oss << ')';
1951            break;
1952      case G_CONDEVAL:
1953            oss << opToString(m_op)<< '(' ;
1954            m_mask->intoString(oss);
1955            oss << " ? ";
1956            m_left->intoString(oss);
1957            oss << " : ";
1958            m_right->intoString(oss);
1959            oss << ')';
1960            break;
1961      default:
1962            oss << "UNKNOWN";
1963      }
1964    }
1965    
1966    
1967    void
1968    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1969    {
1970      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1971      switch (getOpgroup(m_op))
1972      {
1973      case G_IDENTITY:
1974            if (m_id->isExpanded())
1975            {
1976               oss << "E";
1977            }
1978            else if (m_id->isTagged())
1979            {
1980              oss << "T";
1981            }
1982            else if (m_id->isConstant())
1983            {
1984              oss << "C";
1985            }
1986            else
1987            {
1988              oss << "?";
1989            }
1990            oss << '@' << m_id.get() << endl;
1991            break;
1992      case G_BINARY:
1993            oss << opToString(m_op) << endl;
1994            indent+='.';
1995            m_left->intoTreeString(oss, indent);
1996            m_right->intoTreeString(oss, indent);
1997            break;
1998      case G_UNARY:
1999      case G_UNARY_P:
2000      case G_NP1OUT:
2001      case G_NP1OUT_P:
2002      case G_REDUCTION:
2003            oss << opToString(m_op) << endl;
2004            indent+='.';
2005            m_left->intoTreeString(oss, indent);
2006            break;
2007      case G_TENSORPROD:
2008            oss << opToString(m_op) << endl;
2009            indent+='.';
2010            m_left->intoTreeString(oss, indent);
2011            m_right->intoTreeString(oss, indent);
2012            break;
2013      case G_NP1OUT_2P:
2014            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2015            indent+='.';
2016            m_left->intoTreeString(oss, indent);
2017            break;
2018      default:
2019            oss << "UNKNOWN";
2020      }
2021  }  }
2022    
2023  // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
2024  DataAbstract*  DataAbstract*
2025  DataLazy::deepCopy()  DataLazy::deepCopy() const
2026  {  {
2027    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
2028    {    {
2029      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2030      case G_UNARY:
2031      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2032      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2033      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2034      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2035      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2036      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2037      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2038      default:
2039            throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2040    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2041  }  }
2042    
2043    
2044  DataTypes::ValueType::size_type  
2045    // There is no single, natural interpretation of getLength on DataLazy.
2046    // Instances of DataReady can look at the size of their vectors.
2047    // For lazy though, it could be the size the data would be if it were resolved;
2048    // or it could be some function of the lengths of the DataReady instances which
2049    // form part of the expression.
2050    // Rather than have people making assumptions, I have disabled the method.
2051    DataTypes::RealVectorType::size_type
2052  DataLazy::getLength() const  DataLazy::getLength() const
2053  {  {
2054    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2055  }  }
2056    
2057    
# Line 309  DataLazy::getSlice(const DataTypes::Regi Line 2061  DataLazy::getSlice(const DataTypes::Regi
2061    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2062  }  }
2063    
2064  DataTypes::ValueType::size_type  
2065    // To do this we need to rely on our child nodes
2066    DataTypes::RealVectorType::size_type
2067    DataLazy::getPointOffset(int sampleNo,
2068                     int dataPointNo)
2069    {
2070      if (m_op==IDENTITY)
2071      {
2072            return m_id->getPointOffset(sampleNo,dataPointNo);
2073      }
2074      if (m_readytype!='E')
2075      {
2076            collapse();
2077            return m_id->getPointOffset(sampleNo,dataPointNo);
2078      }
2079      // at this point we do not have an identity node and the expression will be Expanded
2080      // so we only need to know which child to ask
2081      if (m_left->m_readytype=='E')
2082      {
2083            return m_left->getPointOffset(sampleNo,dataPointNo);
2084      }
2085      else
2086      {
2087            return m_right->getPointOffset(sampleNo,dataPointNo);
2088      }
2089    }
2090    
2091    // To do this we need to rely on our child nodes
2092    DataTypes::RealVectorType::size_type
2093  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2094                   int dataPointNo) const                   int dataPointNo) const
2095  {  {
2096    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2097      {
2098            return m_id->getPointOffset(sampleNo,dataPointNo);
2099      }
2100      if (m_readytype=='E')
2101      {
2102        // at this point we do not have an identity node and the expression will be Expanded
2103        // so we only need to know which child to ask
2104        if (m_left->m_readytype=='E')
2105        {
2106            return m_left->getPointOffset(sampleNo,dataPointNo);
2107        }
2108        else
2109        {
2110            return m_right->getPointOffset(sampleNo,dataPointNo);
2111        }
2112      }
2113      if (m_readytype=='C')
2114      {
2115            return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2116      }
2117      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2118    }
2119    
2120    
2121    // I have decided to let Data:: handle this issue.
2122    void
2123    DataLazy::setToZero()
2124    {
2125    //   DataTypes::RealVectorType v(getNoValues(),0);
2126    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2127    //   m_op=IDENTITY;
2128    //   m_right.reset();  
2129    //   m_left.reset();
2130    //   m_readytype='C';
2131    //   m_buffsRequired=1;
2132    
2133      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2134      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2135  }  }
2136    
2137  // // The startOffset is where to write results in the output vector v  bool
2138  // void  DataLazy::actsExpanded() const
2139  // DataLazy::processSample(ValueType& v, int sampleNo, size_t startOffset)  {
2140  // {          return (m_readytype=='E');
2141  //     m_left.processSample(v,sampleNo,startOffset);  }
 //     m_right.processSample(v,sampleNo,startOffset+getSampleSize());  
 //     int i;  
 //     #pragma omp parallel for private(i) schedule(static)  
 //     for (i=0;i<getSampleSize();++i)  
 //     {  
 //      performOp(v,startOffset+i*m_pointsize,ES_optype,m_samplesize);  
 //     }  
 // }  
2142    
2143    } // end namespace
2144    
 }   // end namespace  

Legend:
Removed from v.1879  
changed lines
  Added in v.6083

  ViewVC Help
Powered by ViewVC 1.1.26