/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1885 by jfenwick, Tue Oct 14 05:51:59 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 5997 by caltinay, Mon Feb 29 07:24:47 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21    #include "Data.h"
22    #include "DataTypes.h"
23    #include "EscriptParams.h"
24    #include "FunctionSpace.h"
25    #include "UnaryFuncs.h"    // for escript::fsign
26    #include "Utils.h"
27    
28  #ifdef USE_NETCDF  #ifdef USE_NETCDF
29  #include <netcdfcpp.h>  #include <netcdfcpp.h>
30  #endif  #endif
31  #ifdef PASO_MPI  
32  #include <mpi.h>  #include <iomanip> // for some fancy formatting in debug
33  #endif  
34  #ifdef _OPENMP  using namespace escript::DataTypes;
35  #include <omp.h>  
36  #endif  #define NO_ARG
37  #include "FunctionSpace.h"  
38  #include "DataTypes.h"  // #define LAZYDEBUG(X) if (privdebug){X;}
39  #include "Data.h"  #define LAZYDEBUG(X)
40    namespace
41    {
42    bool privdebug=false;
43    
44    #define ENABLEDEBUG privdebug=true;
45    #define DISABLEDEBUG privdebug=false;
46    }
47    
48    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
49    
50    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
51    
52    
53    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
54    
55    /*
56    How does DataLazy work?
57    ~~~~~~~~~~~~~~~~~~~~~~~
58    
59    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
60    denoted left and right.
61    
62    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
63    This means that all "internal" nodes in the structure are instances of DataLazy.
64    
65    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
66    Note that IDENTITY is not considered a unary operation.
67    
68    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
69    It must however form a DAG (directed acyclic graph).
70    I will refer to individual DataLazy objects with the structure as nodes.
71    
72    Each node also stores:
73    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
74            evaluated.
75    - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
76            evaluate the expression.
77    - m_samplesize ~ the number of doubles stored in a sample.
78    
79    When a new node is created, the above values are computed based on the values in the child nodes.
80    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
81    
82    The resolve method, which produces a DataReady from a DataLazy, does the following:
83    1) Create a DataReady to hold the new result.
84    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
85    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
86    
87    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
88    
89    resolveSample returns a Vector* and an offset within that vector where the result is stored.
90    Normally, this would be v, but for identity nodes their internal vector is returned instead.
91    
92    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
93    
94    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
95    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
96    
97    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
98    1) Add to the ES_optype.
99    2) determine what opgroup your operation belongs to (X)
100    3) add a string for the op to the end of ES_opstrings
101    4) increase ES_opcount
102    5) add an entry (X) to opgroups
103    6) add an entry to the switch in collapseToReady
104    7) add an entry to resolveX
105    */
106    
107    
108  using namespace std;  using namespace std;
109  using namespace boost;  using namespace boost;
# Line 32  using namespace boost; Line 111  using namespace boost;
111  namespace escript  namespace escript
112  {  {
113    
 const std::string&  
 opToString(ES_optype op);  
   
114  namespace  namespace
115  {  {
116    
117    
118    // enabling this will print out when ever the maximum stacksize used by resolve increases
119    // it assumes _OPENMP is also in use
120    //#define LAZY_STACK_PROF
121    
122    
123    
124    #ifndef _OPENMP
125      #ifdef LAZY_STACK_PROF
126      #undef LAZY_STACK_PROF
127      #endif
128    #endif
129    
130    
131    #ifdef LAZY_STACK_PROF
132    std::vector<void*> stackstart(getNumberOfThreads());
133    std::vector<void*> stackend(getNumberOfThreads());
134    size_t maxstackuse=0;
135    #endif
136    
137    enum ES_opgroup
138    {
139       G_UNKNOWN,
140       G_IDENTITY,
141       G_BINARY,            // pointwise operations with two arguments
142       G_UNARY,             // pointwise operations with one argument
143       G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
144       G_NP1OUT,            // non-pointwise op with one output
145       G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
146       G_TENSORPROD,        // general tensor product
147       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
148       G_REDUCTION,         // non-pointwise unary op with a scalar output
149       G_CONDEVAL
150    };
151    
152    
153    
154    
155    string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
156                            "sin","cos","tan",
157                            "asin","acos","atan","sinh","cosh","tanh","erf",
158                            "asinh","acosh","atanh",
159                            "log10","log","sign","abs","neg","pos","exp","sqrt",
160                            "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
161                            "symmetric","nonsymmetric",
162                            "prod",
163                            "transpose", "trace",
164                            "swapaxes",
165                            "minval", "maxval",
166                            "condEval"};
167    int ES_opcount=44;
168    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
169                            G_UNARY,G_UNARY,G_UNARY, //10
170                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
171                            G_UNARY,G_UNARY,G_UNARY,                                        // 20
172                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
173                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
174                            G_NP1OUT,G_NP1OUT,
175                            G_TENSORPROD,
176                            G_NP1OUT_P, G_NP1OUT_P,
177                            G_NP1OUT_2P,
178                            G_REDUCTION, G_REDUCTION,
179                            G_CONDEVAL};
180    inline
181    ES_opgroup
182    getOpgroup(ES_optype op)
183    {
184      return opgroups[op];
185    }
186    
187  // return the FunctionSpace of the result of "left op right"  // return the FunctionSpace of the result of "left op right"
188  FunctionSpace  FunctionSpace
189  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
190  {  {
191      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
192      // maybe we need an interpolate node -          // maybe we need an interpolate node -
193      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
194      // programming error exception.          // programming error exception.
195    
196      FunctionSpace l=left->getFunctionSpace();
197      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
198      {    if (l!=r)
199          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
200      }      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
201      return left->getFunctionSpace();      if (res==1)
202        {
203            return l;
204        }
205        if (res==-1)
206        {
207            return r;
208        }
209        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
210      }
211      return l;
212  }  }
213    
214  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
215    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
216  DataTypes::ShapeType  DataTypes::ShapeType
217  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
218  {  {
219      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
220      {          {
221          throw DataException("Shapes not the same - shapes must match for lazy data.");            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
222      }            {
223      return left->getShape();                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
224  }            }
225    
226  size_t            if (left->getRank()==0)       // we need to allow scalar * anything
227  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)            {
228  {                  return right->getShape();
229     switch(op)            }
230     {            if (right->getRank()==0)
231  //   case IDENTITY: return left->getLength();            {
232     case ADD:    // the length is preserved in these ops                  return left->getShape();
233     case SUB:            }
234     case MUL:            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
235     case DIV: return left->getLength();          }
236     default:          return left->getShape();
237      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  }
238    
239     }  // return the shape for "op left"
240  }  
241    DataTypes::ShapeType
242  int  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
243  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  {
244  {          switch(op)
245     switch(op)          {
246     {          case TRANS:
247     case IDENTITY: return 0;             {                    // for the scoping of variables
248     case ADD:    // the length is preserved in these ops                  const DataTypes::ShapeType& s=left->getShape();
249     case SUB:                  DataTypes::ShapeType sh;
250     case MUL:                  int rank=left->getRank();
251     case DIV: return max(left->getBuffsRequired(),right->getBuffsRequired());                  if (axis_offset<0 || axis_offset>rank)
252     default:                  {
253      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");              stringstream e;
254     }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
255  }              throw DataException(e.str());
256            }
257  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/"};          for (int i=0; i<rank; i++)
258  int ES_opcount=5;                  {
259                       int index = (axis_offset+i)%rank;
260  // void performOp(ValueType& v, int startOffset, ES_optype op, int m_samplesize)             sh.push_back(s[index]); // Append to new shape
261  // {          }
262  //    switch(op)                  return sh;
263  //    {             }
264  //    case ADD:  DataMaths::binaryOp(v,getShape(),startOffset,v,getShape(),          break;
265  //      startOffset+m_samplesize,plus<double>());          case TRACE:
266  //        break;                 {
267  //    case SUB:  DataMaths::binaryOp(v,getShape(),startOffset,v,getShape(),                  int rank=left->getRank();
268  //      startOffset+m_samplesize,minus<double>());                  if (rank<2)
269  //        break;                  {
270  //    case MUL:  DataMaths::binaryOp(v,getShape(),startOffset,v,getShape(),                     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
271  //      startOffset+m_samplesize,multiplies<double>());                  }
272  //        break;                  if ((axis_offset>rank-2) || (axis_offset<0))
273  //    case DIV:  DataMaths::binaryOp(v,getShape(),startOffset,v,getShape(),                  {
274  //      startOffset+m_samplesize,divides<double>());                     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
275  //        break;                  }
276  //    default:                  if (rank==2)
277  //  throw DataException("Programmer Error - attempt to performOp() for operator "+opToString(op)+".");                  {
278  //    }                     return DataTypes::scalarShape;
279  //                  }
280  // }                  else if (rank==3)
281                    {
282                       DataTypes::ShapeType sh;
283                       if (axis_offset==0)
284                       {
285                            sh.push_back(left->getShape()[2]);
286                       }
287                       else         // offset==1
288                       {
289                            sh.push_back(left->getShape()[0]);
290                       }
291                       return sh;
292                    }
293                    else if (rank==4)
294                    {
295                       DataTypes::ShapeType sh;
296                       const DataTypes::ShapeType& s=left->getShape();
297                       if (axis_offset==0)
298                       {
299                            sh.push_back(s[2]);
300                            sh.push_back(s[3]);
301                       }
302                       else if (axis_offset==1)
303                       {
304                            sh.push_back(s[0]);
305                            sh.push_back(s[3]);
306                       }
307                       else         // offset==2
308                       {
309                            sh.push_back(s[0]);
310                            sh.push_back(s[1]);
311                       }
312                       return sh;
313                    }
314                    else            // unknown rank
315                    {
316                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
317                    }
318               }
319            break;
320            default:
321            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
322            }
323    }
324    
325    DataTypes::ShapeType
326    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
327    {
328         // This code taken from the Data.cpp swapaxes() method
329         // Some of the checks are probably redundant here
330         int axis0_tmp,axis1_tmp;
331         const DataTypes::ShapeType& s=left->getShape();
332         DataTypes::ShapeType out_shape;
333         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
334         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
335         int rank=left->getRank();
336         if (rank<2) {
337            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
338         }
339         if (axis0<0 || axis0>rank-1) {
340            stringstream e;
341            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
342            throw DataException(e.str());
343         }
344         if (axis1<0 || axis1>rank-1) {
345            stringstream e;
346            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
347            throw DataException(e.str());
348         }
349         if (axis0 == axis1) {
350             throw DataException("Error - Data::swapaxes: axis indices must be different.");
351         }
352         if (axis0 > axis1) {
353             axis0_tmp=axis1;
354             axis1_tmp=axis0;
355         } else {
356             axis0_tmp=axis0;
357             axis1_tmp=axis1;
358         }
359         for (int i=0; i<rank; i++) {
360           if (i == axis0_tmp) {
361              out_shape.push_back(s[axis1_tmp]);
362           } else if (i == axis1_tmp) {
363              out_shape.push_back(s[axis0_tmp]);
364           } else {
365              out_shape.push_back(s[i]);
366           }
367         }
368        return out_shape;
369    }
370    
371    
372    // determine the output shape for the general tensor product operation
373    // the additional parameters return information required later for the product
374    // the majority of this code is copy pasted from C_General_Tensor_Product
375    DataTypes::ShapeType
376    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
377    {
378            
379      // Get rank and shape of inputs
380      int rank0 = left->getRank();
381      int rank1 = right->getRank();
382      const DataTypes::ShapeType& shape0 = left->getShape();
383      const DataTypes::ShapeType& shape1 = right->getShape();
384    
385      // Prepare for the loops of the product and verify compatibility of shapes
386      int start0=0, start1=0;
387      if (transpose == 0)           {}
388      else if (transpose == 1)      { start0 = axis_offset; }
389      else if (transpose == 2)      { start1 = rank1-axis_offset; }
390      else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
391    
392      if (rank0<axis_offset)
393      {
394            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
395      }
396    
397      // Adjust the shapes for transpose
398      DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
399      DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
400      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
401      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
402    
403      // Prepare for the loops of the product
404      SL=1, SM=1, SR=1;
405      for (int i=0; i<rank0-axis_offset; i++)       {
406        SL *= tmpShape0[i];
407      }
408      for (int i=rank0-axis_offset; i<rank0; i++)   {
409        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
410          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
411        }
412        SM *= tmpShape0[i];
413      }
414      for (int i=axis_offset; i<rank1; i++)         {
415        SR *= tmpShape1[i];
416      }
417    
418      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
419      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
420      {                     // block to limit the scope of out_index
421         int out_index=0;
422         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
423         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
424      }
425    
426      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
427      {
428         ostringstream os;
429         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
430         throw DataException(os.str());
431      }
432    
433  }   // end anonymous namespace    return shape2;
434    }
435    
436    }       // end anonymous namespace
437    
438    
439    
440    // Return a string representing the operation
441  const std::string&  const std::string&
442  opToString(ES_optype op)  opToString(ES_optype op)
443  {  {
# Line 135  opToString(ES_optype op) Line 448  opToString(ES_optype op)
448    return ES_opstrings[op];    return ES_opstrings[op];
449  }  }
450    
451    void DataLazy::LazyNodeSetup()
452    {
453    #ifdef _OPENMP
454        int numthreads=omp_get_max_threads();
455        m_samples.resize(numthreads*m_samplesize);
456        m_sampleids=new int[numthreads];
457        for (int i=0;i<numthreads;++i)
458        {
459            m_sampleids[i]=-1;  
460        }
461    #else
462        m_samples.resize(m_samplesize);
463        m_sampleids=new int[1];
464        m_sampleids[0]=-1;
465    #endif  // _OPENMP
466    }
467    
468    
469    // Creates an identity node
470  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
471      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
472      m_op(IDENTITY)          ,m_sampleids(0),
473            m_samples(1)
474  {  {
475     if (p->isLazy())     if (p->isLazy())
476     {     {
477      // TODO: fix this.   We could make the new node a copy of p?          // I don't want identity of Lazy.
478      // I don't want identity of Lazy.          // Question: Why would that be so bad?
479      // Question: Why would that be so bad?          // Answer: We assume that the child of ID is something we can call getVector on
480      // Answer: We assume that the child of ID is something we can call getVector on          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
     throw DataException("Programmer error - attempt to create identity from a DataLazy.");  
481     }     }
482     else     else
483     {     {
484      m_id=dynamic_pointer_cast<DataReady>(p);          p->makeLazyShared();
485            DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
486            makeIdentity(dr);
487    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
488     }     }
489     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=0;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
490  }  }
491    
492  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
493      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
494      m_left(left),          m_op(op),
495      m_right(right),          m_axis_offset(0),
496      m_op(op)          m_transpose(0),
497            m_SL(0), m_SM(0), m_SR(0)
498  {  {
499     m_length=resultLength(m_left,m_right,m_op);     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
500       {
501            throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
502       }
503    
504       DataLazy_ptr lleft;
505       if (!left->isLazy())
506       {
507            lleft=DataLazy_ptr(new DataLazy(left));
508       }
509       else
510       {
511            lleft=dynamic_pointer_cast<DataLazy>(left);
512       }
513       m_readytype=lleft->m_readytype;
514       m_left=lleft;
515     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
516     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+1;
517  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
518       LazyNodeSetup();
519       SIZELIMIT
520  }  }
521    
522    
523    // In this constructor we need to consider interpolation
524  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
525      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
526      m_op(op)          m_op(op),
527            m_SL(0), m_SM(0), m_SR(0)
528    {
529    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
530       if ((getOpgroup(op)!=G_BINARY))
531       {
532            throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
533       }
534    
535       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
536       {
537            FunctionSpace fs=getFunctionSpace();
538            Data ltemp(left);
539            Data tmp(ltemp,fs);
540            left=tmp.borrowDataPtr();
541       }
542       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
543       {
544            Data tmp(Data(right),getFunctionSpace());
545            right=tmp.borrowDataPtr();
546    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
547       }
548       left->operandCheck(*right);
549    
550       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
551       {
552            m_left=dynamic_pointer_cast<DataLazy>(left);
553    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
554       }
555       else
556       {
557            m_left=DataLazy_ptr(new DataLazy(left));
558    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
559       }
560       if (right->isLazy())
561       {
562            m_right=dynamic_pointer_cast<DataLazy>(right);
563    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
564       }
565       else
566       {
567            m_right=DataLazy_ptr(new DataLazy(right));
568    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
569       }
570       char lt=m_left->m_readytype;
571       char rt=m_right->m_readytype;
572       if (lt=='E' || rt=='E')
573       {
574            m_readytype='E';
575       }
576       else if (lt=='T' || rt=='T')
577       {
578            m_readytype='T';
579       }
580       else
581       {
582            m_readytype='C';
583       }
584       m_samplesize=getNumDPPSample()*getNoValues();
585       m_children=m_left->m_children+m_right->m_children+2;
586       m_height=max(m_left->m_height,m_right->m_height)+1;
587       LazyNodeSetup();
588       SIZELIMIT
589    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
590    }
591    
592    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
593            : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
594            m_op(op),
595            m_axis_offset(axis_offset),
596            m_transpose(transpose)
597  {  {
598     if (left->isLazy())     if ((getOpgroup(op)!=G_TENSORPROD))
599       {
600            throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
601       }
602       if ((transpose>2) || (transpose<0))
603       {
604            throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
605       }
606       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
607       {
608            FunctionSpace fs=getFunctionSpace();
609            Data ltemp(left);
610            Data tmp(ltemp,fs);
611            left=tmp.borrowDataPtr();
612       }
613       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
614       {
615            Data tmp(Data(right),getFunctionSpace());
616            right=tmp.borrowDataPtr();
617       }
618    //    left->operandCheck(*right);
619    
620       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
621     {     {
622      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
623     }     }
624     else     else
625     {     {
626      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
627     }     }
628     if (right->isLazy())     if (right->isLazy())
629     {     {
630      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
631       }
632       else
633       {
634            m_right=DataLazy_ptr(new DataLazy(right));
635       }
636       char lt=m_left->m_readytype;
637       char rt=m_right->m_readytype;
638       if (lt=='E' || rt=='E')
639       {
640            m_readytype='E';
641       }
642       else if (lt=='T' || rt=='T')
643       {
644            m_readytype='T';
645       }
646       else
647       {
648            m_readytype='C';
649       }
650       m_samplesize=getNumDPPSample()*getNoValues();
651       m_children=m_left->m_children+m_right->m_children+2;
652       m_height=max(m_left->m_height,m_right->m_height)+1;
653       LazyNodeSetup();
654       SIZELIMIT
655    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
656    }
657    
658    
659    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
660            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
661            m_op(op),
662            m_axis_offset(axis_offset),
663            m_transpose(0),
664            m_tol(0)
665    {
666       if ((getOpgroup(op)!=G_NP1OUT_P))
667       {
668            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
669       }
670       DataLazy_ptr lleft;
671       if (!left->isLazy())
672       {
673            lleft=DataLazy_ptr(new DataLazy(left));
674       }
675       else
676       {
677            lleft=dynamic_pointer_cast<DataLazy>(left);
678       }
679       m_readytype=lleft->m_readytype;
680       m_left=lleft;
681       m_samplesize=getNumDPPSample()*getNoValues();
682       m_children=m_left->m_children+1;
683       m_height=m_left->m_height+1;
684       LazyNodeSetup();
685       SIZELIMIT
686    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
687    }
688    
689    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
690            : parent(left->getFunctionSpace(), left->getShape()),
691            m_op(op),
692            m_axis_offset(0),
693            m_transpose(0),
694            m_tol(tol)
695    {
696       if ((getOpgroup(op)!=G_UNARY_P))
697       {
698            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
699       }
700       DataLazy_ptr lleft;
701       if (!left->isLazy())
702       {
703            lleft=DataLazy_ptr(new DataLazy(left));
704     }     }
705     else     else
706     {     {
707      m_right=DataLazy_ptr(new DataLazy(right));          lleft=dynamic_pointer_cast<DataLazy>(left);
708     }     }
709       m_readytype=lleft->m_readytype;
710       m_left=lleft;
711       m_samplesize=getNumDPPSample()*getNoValues();
712       m_children=m_left->m_children+1;
713       m_height=m_left->m_height+1;
714       LazyNodeSetup();
715       SIZELIMIT
716    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
717    }
718    
719    
720    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
721            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
722            m_op(op),
723            m_axis_offset(axis0),
724            m_transpose(axis1),
725            m_tol(0)
726    {
727       if ((getOpgroup(op)!=G_NP1OUT_2P))
728       {
729            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
730       }
731       DataLazy_ptr lleft;
732       if (!left->isLazy())
733       {
734            lleft=DataLazy_ptr(new DataLazy(left));
735       }
736       else
737       {
738            lleft=dynamic_pointer_cast<DataLazy>(left);
739       }
740       m_readytype=lleft->m_readytype;
741       m_left=lleft;
742       m_samplesize=getNumDPPSample()*getNoValues();
743       m_children=m_left->m_children+1;
744       m_height=m_left->m_height+1;
745       LazyNodeSetup();
746       SIZELIMIT
747    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
748    }
749    
750    
751    namespace
752    {
753    
754        inline int max3(int a, int b, int c)
755        {
756            int t=(a>b?a:b);
757            return (t>c?t:c);
758    
759        }
760    }
761    
762    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
763            : parent(left->getFunctionSpace(), left->getShape()),
764            m_op(CONDEVAL),
765            m_axis_offset(0),
766            m_transpose(0),
767            m_tol(0)
768    {
769    
770     m_length=resultLength(m_left,m_right,m_op);     DataLazy_ptr lmask;
771       DataLazy_ptr lleft;
772       DataLazy_ptr lright;
773       if (!mask->isLazy())
774       {
775            lmask=DataLazy_ptr(new DataLazy(mask));
776       }
777       else
778       {
779            lmask=dynamic_pointer_cast<DataLazy>(mask);
780       }
781       if (!left->isLazy())
782       {
783            lleft=DataLazy_ptr(new DataLazy(left));
784       }
785       else
786       {
787            lleft=dynamic_pointer_cast<DataLazy>(left);
788       }
789       if (!right->isLazy())
790       {
791            lright=DataLazy_ptr(new DataLazy(right));
792       }
793       else
794       {
795            lright=dynamic_pointer_cast<DataLazy>(right);
796       }
797       m_readytype=lmask->m_readytype;
798       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
799       {
800            throw DataException("Programmer Error - condEval arguments must have the same readytype");
801       }
802       m_left=lleft;
803       m_right=lright;
804       m_mask=lmask;
805     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
806     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
807  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
808       LazyNodeSetup();
809       SIZELIMIT
810    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
811  }  }
812    
813    
814    
815  DataLazy::~DataLazy()  DataLazy::~DataLazy()
816  {  {
817       delete[] m_sampleids;
818  }  }
819    
820    
821  int  /*
822  DataLazy::getBuffsRequired() const    \brief Evaluates the expression using methods on Data.
823      This does the work for the collapse method.
824      For reasons of efficiency do not call this method on DataExpanded nodes.
825    */
826    DataReady_ptr
827    DataLazy::collapseToReady() const
828  {  {
829      return m_buffsRequired;    if (m_readytype=='E')
830      {     // this is more an efficiency concern than anything else
831        throw DataException("Programmer Error - do not use collapse on Expanded data.");
832      }
833      if (m_op==IDENTITY)
834      {
835        return m_id;
836      }
837      DataReady_ptr pleft=m_left->collapseToReady();
838      Data left(pleft);
839      Data right;
840      if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
841      {
842        right=Data(m_right->collapseToReady());
843      }
844      Data result;
845      switch(m_op)
846      {
847        case ADD:
848            result=left+right;
849            break;
850        case SUB:          
851            result=left-right;
852            break;
853        case MUL:          
854            result=left*right;
855            break;
856        case DIV:          
857            result=left/right;
858            break;
859        case SIN:
860            result=left.sin();      
861            break;
862        case COS:
863            result=left.cos();
864            break;
865        case TAN:
866            result=left.tan();
867            break;
868        case ASIN:
869            result=left.asin();
870            break;
871        case ACOS:
872            result=left.acos();
873            break;
874        case ATAN:
875            result=left.atan();
876            break;
877        case SINH:
878            result=left.sinh();
879            break;
880        case COSH:
881            result=left.cosh();
882            break;
883        case TANH:
884            result=left.tanh();
885            break;
886        case ERF:
887            result=left.erf();
888            break;
889       case ASINH:
890            result=left.asinh();
891            break;
892       case ACOSH:
893            result=left.acosh();
894            break;
895       case ATANH:
896            result=left.atanh();
897            break;
898        case LOG10:
899            result=left.log10();
900            break;
901        case LOG:
902            result=left.log();
903            break;
904        case SIGN:
905            result=left.sign();
906            break;
907        case ABS:
908            result=left.abs();
909            break;
910        case NEG:
911            result=left.neg();
912            break;
913        case POS:
914            // it doesn't mean anything for delayed.
915            // it will just trigger a deep copy of the lazy object
916            throw DataException("Programmer error - POS not supported for lazy data.");
917            break;
918        case EXP:
919            result=left.exp();
920            break;
921        case SQRT:
922            result=left.sqrt();
923            break;
924        case RECIP:
925            result=left.oneOver();
926            break;
927        case GZ:
928            result=left.wherePositive();
929            break;
930        case LZ:
931            result=left.whereNegative();
932            break;
933        case GEZ:
934            result=left.whereNonNegative();
935            break;
936        case LEZ:
937            result=left.whereNonPositive();
938            break;
939        case NEZ:
940            result=left.whereNonZero(m_tol);
941            break;
942        case EZ:
943            result=left.whereZero(m_tol);
944            break;
945        case SYM:
946            result=left.symmetric();
947            break;
948        case NSYM:
949            result=left.nonsymmetric();
950            break;
951        case PROD:
952            result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
953            break;
954        case TRANS:
955            result=left.transpose(m_axis_offset);
956            break;
957        case TRACE:
958            result=left.trace(m_axis_offset);
959            break;
960        case SWAP:
961            result=left.swapaxes(m_axis_offset, m_transpose);
962            break;
963        case MINVAL:
964            result=left.minval();
965            break;
966        case MAXVAL:
967            result=left.minval();
968            break;
969        default:
970            throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
971      }
972      return result.borrowReadyPtr();
973  }  }
974    
975    /*
976       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
977       This method uses the original methods on the Data class to evaluate the expressions.
978       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
979       the purpose of using DataLazy in the first place).
980    */
981    void
982    DataLazy::collapse() const
983    {
984      if (m_op==IDENTITY)
985      {
986            return;
987      }
988      if (m_readytype=='E')
989      {     // this is more an efficiency concern than anything else
990        throw DataException("Programmer Error - do not use collapse on Expanded data.");
991      }
992      m_id=collapseToReady();
993      m_op=IDENTITY;
994    }
995    
996    
997    
998  // the vector and the offset are a place where the method could write its data if it wishes  
999  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
1000  // Hence the return value to indicate where the data is actually stored.  
1001  // Regardless, the storage should be assumed to be used, even if it isn't.  #define PROC_OP(TYPE,X)                               \
1002  const double*          for (int j=0;j<onumsteps;++j)\
1003  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const          {\
1004              for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1005              { \
1006    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1007    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1008                 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1009    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1010                 lroffset+=leftstep; \
1011                 rroffset+=rightstep; \
1012              }\
1013              lroffset+=oleftstep;\
1014              rroffset+=orightstep;\
1015            }
1016    
1017    
1018    // The result will be stored in m_samples
1019    // The return value is a pointer to the DataVector, offset is the offset within the return value
1020    const DataTypes::RealVectorType*
1021    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1022  {  {
1023    if (m_op==IDENTITY)    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1024            // collapse so we have a 'E' node or an IDENTITY for some other type
1025      if (m_readytype!='E' && m_op!=IDENTITY)
1026    {    {
1027      const ValueType& vec=m_id->getVector();          collapse();
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
1028    }    }
1029    size_t rightoffset=offset+m_samplesize;    if (m_op==IDENTITY)  
   const double* left=m_left->resolveSample(v,sampleNo,offset);  
   const double* right=m_right->resolveSample(v,sampleNo,rightoffset);  
   double* result=&(v[offset]);  
1030    {    {
1031      switch(m_op)      const ValueType& vec=m_id->getVectorRO();
1032      {      roffset=m_id->getPointOffset(sampleNo, 0);
1033      case ADD:       // since these are pointwise ops, pretend each sample is one point  #ifdef LAZY_STACK_PROF
1034      tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  int x;
1035      break;  if (&x<stackend[omp_get_thread_num()])
1036      case SUB:        {
1037      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());         stackend[omp_get_thread_num()]=&x;
1038      break;  }
1039      case MUL:        #endif
1040      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      return &(vec);
1041      break;    }
1042      case DIV:          if (m_readytype!='E')
1043      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());    {
1044      break;      throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1045      }
1046      if (m_sampleids[tid]==sampleNo)
1047      {
1048            roffset=tid*m_samplesize;
1049            return &(m_samples);            // sample is already resolved
1050      }
1051      m_sampleids[tid]=sampleNo;
1052    
1053      switch (getOpgroup(m_op))
1054      {
1055      case G_UNARY:
1056      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1057      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1058      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1059      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1060      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1061      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1062      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1063      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1064      default:
1065        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1066      }
1067    }
1068    
1069    const DataTypes::RealVectorType*
1070    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1071    {
1072            // we assume that any collapsing has been done before we get here
1073            // since we only have one argument we don't need to think about only
1074            // processing single points.
1075            // we will also know we won't get identity nodes
1076      if (m_readytype!='E')
1077      {
1078        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1079      }
1080      if (m_op==IDENTITY)
1081      {
1082        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1083      }
1084      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1085      const double* left=&((*leftres)[roffset]);
1086      roffset=m_samplesize*tid;
1087      double* result=&(m_samples[roffset]);
1088      switch (m_op)
1089      {
1090        case SIN:  
1091            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1092            break;
1093        case COS:
1094            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1095            break;
1096        case TAN:
1097            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1098            break;
1099        case ASIN:
1100            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1101            break;
1102        case ACOS:
1103            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1104            break;
1105        case ATAN:
1106            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1107            break;
1108        case SINH:
1109            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1110            break;
1111        case COSH:
1112            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1113            break;
1114        case TANH:
1115            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1116            break;
1117        case ERF:
1118    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1119            throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1120    #else
1121            tensor_unary_operation(m_samplesize, left, result, ::erf);
1122            break;
1123    #endif
1124       case ASINH:
1125    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1126            tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1127    #else
1128            tensor_unary_operation(m_samplesize, left, result, ::asinh);
1129    #endif  
1130            break;
1131       case ACOSH:
1132    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1133            tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1134    #else
1135            tensor_unary_operation(m_samplesize, left, result, ::acosh);
1136    #endif  
1137            break;
1138       case ATANH:
1139    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1140            tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1141    #else
1142            tensor_unary_operation(m_samplesize, left, result, ::atanh);
1143    #endif  
1144            break;
1145        case LOG10:
1146            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1147            break;
1148        case LOG:
1149            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1150            break;
1151        case SIGN:
1152            tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1153            break;
1154        case ABS:
1155            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1156            break;
1157        case NEG:
1158            tensor_unary_operation(m_samplesize, left, result, negate<double>());
1159            break;
1160        case POS:
1161            // it doesn't mean anything for delayed.
1162            // it will just trigger a deep copy of the lazy object
1163            throw DataException("Programmer error - POS not supported for lazy data.");
1164            break;
1165        case EXP:
1166            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1167            break;
1168        case SQRT:
1169            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1170            break;
1171        case RECIP:
1172            tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1173            break;
1174        case GZ:
1175            tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1176            break;
1177        case LZ:
1178            tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1179            break;
1180        case GEZ:
1181            tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1182            break;
1183        case LEZ:
1184            tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1185            break;
1186    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1187        case NEZ:
1188            tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1189            break;
1190        case EZ:
1191            tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1192            break;
1193    
1194      default:      default:
1195      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1196      }    }
1197      return &(m_samples);
1198    }
1199    
1200    
1201    const DataTypes::RealVectorType*
1202    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1203    {
1204            // we assume that any collapsing has been done before we get here
1205            // since we only have one argument we don't need to think about only
1206            // processing single points.
1207            // we will also know we won't get identity nodes
1208      if (m_readytype!='E')
1209      {
1210        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1211      }
1212      if (m_op==IDENTITY)
1213      {
1214        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1215      }
1216      size_t loffset=0;
1217      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1218    
1219      roffset=m_samplesize*tid;
1220      unsigned int ndpps=getNumDPPSample();
1221      unsigned int psize=DataTypes::noValues(m_left->getShape());
1222      double* result=&(m_samples[roffset]);
1223      switch (m_op)
1224      {
1225        case MINVAL:
1226            {
1227              for (unsigned int z=0;z<ndpps;++z)
1228              {
1229                FMin op;
1230                *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1231                loffset+=psize;
1232                result++;
1233              }
1234            }
1235            break;
1236        case MAXVAL:
1237            {
1238              for (unsigned int z=0;z<ndpps;++z)
1239              {
1240              FMax op;
1241              *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1242              loffset+=psize;
1243              result++;
1244              }
1245            }
1246            break;
1247        default:
1248            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1249      }
1250      return &(m_samples);
1251    }
1252    
1253    const DataTypes::RealVectorType*
1254    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1255    {
1256            // we assume that any collapsing has been done before we get here
1257            // since we only have one argument we don't need to think about only
1258            // processing single points.
1259      if (m_readytype!='E')
1260      {
1261        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1262      }
1263      if (m_op==IDENTITY)
1264      {
1265        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1266      }
1267      size_t subroffset;
1268      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1269      roffset=m_samplesize*tid;
1270      size_t loop=0;
1271      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1272      size_t step=getNoValues();
1273      size_t offset=roffset;
1274      switch (m_op)
1275      {
1276        case SYM:
1277            for (loop=0;loop<numsteps;++loop)
1278            {
1279                DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1280                subroffset+=step;
1281                offset+=step;
1282            }
1283            break;
1284        case NSYM:
1285            for (loop=0;loop<numsteps;++loop)
1286            {
1287                DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1288                subroffset+=step;
1289                offset+=step;
1290            }
1291            break;
1292        default:
1293            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1294      }
1295      return &m_samples;
1296    }
1297    
1298    const DataTypes::RealVectorType*
1299    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1300    {
1301            // we assume that any collapsing has been done before we get here
1302            // since we only have one argument we don't need to think about only
1303            // processing single points.
1304      if (m_readytype!='E')
1305      {
1306        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1307      }
1308      if (m_op==IDENTITY)
1309      {
1310        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1311      }
1312      size_t subroffset;
1313      size_t offset;
1314      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1315      roffset=m_samplesize*tid;
1316      offset=roffset;
1317      size_t loop=0;
1318      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1319      size_t outstep=getNoValues();
1320      size_t instep=m_left->getNoValues();
1321      switch (m_op)
1322      {
1323        case TRACE:
1324            for (loop=0;loop<numsteps;++loop)
1325            {
1326                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1327                subroffset+=instep;
1328                offset+=outstep;
1329            }
1330            break;
1331        case TRANS:
1332            for (loop=0;loop<numsteps;++loop)
1333            {
1334                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1335                subroffset+=instep;
1336                offset+=outstep;
1337            }
1338            break;
1339        default:
1340            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1341      }
1342      return &m_samples;
1343    }
1344    
1345    
1346    const DataTypes::RealVectorType*
1347    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1348    {
1349      if (m_readytype!='E')
1350      {
1351        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1352      }
1353      if (m_op==IDENTITY)
1354      {
1355        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1356      }
1357      size_t subroffset;
1358      size_t offset;
1359      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1360      roffset=m_samplesize*tid;
1361      offset=roffset;
1362      size_t loop=0;
1363      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1364      size_t outstep=getNoValues();
1365      size_t instep=m_left->getNoValues();
1366      switch (m_op)
1367      {
1368        case SWAP:
1369            for (loop=0;loop<numsteps;++loop)
1370            {
1371                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1372                subroffset+=instep;
1373                offset+=outstep;
1374            }
1375            break;
1376        default:
1377            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1378      }
1379      return &m_samples;
1380    }
1381    
1382    const DataTypes::RealVectorType*
1383    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1384    {
1385      if (m_readytype!='E')
1386      {
1387        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1388      }
1389      if (m_op!=CONDEVAL)
1390      {
1391        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1392      }
1393      size_t subroffset;
1394    
1395      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1396      const ValueType* srcres=0;
1397      if ((*maskres)[subroffset]>0)
1398      {
1399            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1400      }
1401      else
1402      {
1403            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1404      }
1405    
1406      // Now we need to copy the result
1407    
1408      roffset=m_samplesize*tid;
1409      for (int i=0;i<m_samplesize;++i)
1410      {
1411            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1412    }    }
1413    return result;  
1414      return &m_samples;
1415  }  }
1416    
1417    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1418    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1419    // If both children are expanded, then we can process them in a single operation (we treat
1420    // the whole sample as one big datapoint.
1421    // If one of the children is not expanded, then we need to treat each point in the sample
1422    // individually.
1423    // There is an additional complication when scalar operations are considered.
1424    // For example, 2+Vector.
1425    // In this case each double within the point is treated individually
1426    const DataTypes::RealVectorType*
1427    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1428    {
1429    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1430    
1431      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1432            // first work out which of the children are expanded
1433      bool leftExp=(m_left->m_readytype=='E');
1434      bool rightExp=(m_right->m_readytype=='E');
1435      if (!leftExp && !rightExp)
1436      {
1437            throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1438      }
1439      bool leftScalar=(m_left->getRank()==0);
1440      bool rightScalar=(m_right->getRank()==0);
1441      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1442      {
1443            throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1444      }
1445      size_t leftsize=m_left->getNoValues();
1446      size_t rightsize=m_right->getNoValues();
1447      size_t chunksize=1;                   // how many doubles will be processed in one go
1448      int leftstep=0;               // how far should the left offset advance after each step
1449      int rightstep=0;
1450      int numsteps=0;               // total number of steps for the inner loop
1451      int oleftstep=0;      // the o variables refer to the outer loop
1452      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1453      int onumsteps=1;
1454      
1455      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1456      bool RES=(rightExp && rightScalar);
1457      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1458      bool RS=(!rightExp && rightScalar);
1459      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1460      bool RN=(!rightExp && !rightScalar);
1461      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1462      bool REN=(rightExp && !rightScalar);
1463    
1464      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1465      {
1466            chunksize=m_left->getNumDPPSample()*leftsize;
1467            leftstep=0;
1468            rightstep=0;
1469            numsteps=1;
1470      }
1471      else if (LES || RES)
1472      {
1473            chunksize=1;
1474            if (LES)                // left is an expanded scalar
1475            {
1476                    if (RS)
1477                    {
1478                       leftstep=1;
1479                       rightstep=0;
1480                       numsteps=m_left->getNumDPPSample();
1481                    }
1482                    else            // RN or REN
1483                    {
1484                       leftstep=0;
1485                       oleftstep=1;
1486                       rightstep=1;
1487                       orightstep=(RN ? -(int)rightsize : 0);
1488                       numsteps=rightsize;
1489                       onumsteps=m_left->getNumDPPSample();
1490                    }
1491            }
1492            else            // right is an expanded scalar
1493            {
1494                    if (LS)
1495                    {
1496                       rightstep=1;
1497                       leftstep=0;
1498                       numsteps=m_right->getNumDPPSample();
1499                    }
1500                    else
1501                    {
1502                       rightstep=0;
1503                       orightstep=1;
1504                       leftstep=1;
1505                       oleftstep=(LN ? -(int)leftsize : 0);
1506                       numsteps=leftsize;
1507                       onumsteps=m_right->getNumDPPSample();
1508                    }
1509            }
1510      }
1511      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1512      {
1513            if (LEN)        // and Right will be a single value
1514            {
1515                    chunksize=rightsize;
1516                    leftstep=rightsize;
1517                    rightstep=0;
1518                    numsteps=m_left->getNumDPPSample();
1519                    if (RS)
1520                    {
1521                       numsteps*=leftsize;
1522                    }
1523            }
1524            else    // REN
1525            {
1526                    chunksize=leftsize;
1527                    rightstep=leftsize;
1528                    leftstep=0;
1529                    numsteps=m_right->getNumDPPSample();
1530                    if (LS)
1531                    {
1532                       numsteps*=rightsize;
1533                    }
1534            }
1535      }
1536    
1537      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1538            // Get the values of sub-expressions
1539      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1540      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1541    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1542    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1543    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1544    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1545    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1546    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1547    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1548    
1549    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1550    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1551    
1552    
1553      roffset=m_samplesize*tid;
1554      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1555      switch(m_op)
1556      {
1557        case ADD:
1558            PROC_OP(NO_ARG,plus<double>());
1559            break;
1560        case SUB:
1561            PROC_OP(NO_ARG,minus<double>());
1562            break;
1563        case MUL:
1564            PROC_OP(NO_ARG,multiplies<double>());
1565            break;
1566        case DIV:
1567            PROC_OP(NO_ARG,divides<double>());
1568            break;
1569        case POW:
1570           PROC_OP(double (double,double),::pow);
1571            break;
1572        default:
1573            throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1574      }
1575    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1576      return &m_samples;
1577    }
1578    
1579    
1580    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1581    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1582    // unlike the other resolve helpers, we must treat these datapoints separately.
1583    const DataTypes::RealVectorType*
1584    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1585    {
1586    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1587    
1588      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1589            // first work out which of the children are expanded
1590      bool leftExp=(m_left->m_readytype=='E');
1591      bool rightExp=(m_right->m_readytype=='E');
1592      int steps=getNumDPPSample();
1593      int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1594      int rightStep=(rightExp?m_right->getNoValues() : 0);
1595    
1596      int resultStep=getNoValues();
1597      roffset=m_samplesize*tid;
1598      size_t offset=roffset;
1599    
1600      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1601    
1602      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1603    
1604    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1605    cout << getNoValues() << endl;)
1606    
1607    
1608    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1609    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1610    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1611    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1612    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1613    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1614    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1615    
1616      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1617      switch(m_op)
1618      {
1619        case PROD:
1620            for (int i=0;i<steps;++i,resultp+=resultStep)
1621            {
1622              const double *ptr_0 = &((*left)[lroffset]);
1623              const double *ptr_1 = &((*right)[rroffset]);
1624    
1625    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1626    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1627    
1628              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1629    
1630              lroffset+=leftStep;
1631              rroffset+=rightStep;
1632            }
1633            break;
1634        default:
1635            throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1636      }
1637      roffset=offset;
1638      return &m_samples;
1639    }
1640    
1641    
1642    const DataTypes::RealVectorType*
1643    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1644    {
1645    #ifdef _OPENMP
1646            int tid=omp_get_thread_num();
1647    #else
1648            int tid=0;
1649    #endif
1650    
1651    #ifdef LAZY_STACK_PROF
1652            stackstart[tid]=&tid;
1653            stackend[tid]=&tid;
1654            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1655            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1656            #pragma omp critical
1657            if (d>maxstackuse)
1658            {
1659    cout << "Max resolve Stack use " << d << endl;
1660                    maxstackuse=d;
1661            }
1662            return r;
1663    #else
1664            return resolveNodeSample(tid, sampleNo, roffset);
1665    #endif
1666    }
1667    
1668    
1669    // This needs to do the work of the identity constructor
1670    void
1671    DataLazy::resolveToIdentity()
1672    {
1673       if (m_op==IDENTITY)
1674            return;
1675       DataReady_ptr p=resolveNodeWorker();
1676       makeIdentity(p);
1677    }
1678    
1679    void DataLazy::makeIdentity(const DataReady_ptr& p)
1680    {
1681       m_op=IDENTITY;
1682       m_axis_offset=0;
1683       m_transpose=0;
1684       m_SL=m_SM=m_SR=0;
1685       m_children=m_height=0;
1686       m_id=p;
1687       if(p->isConstant()) {m_readytype='C';}
1688       else if(p->isExpanded()) {m_readytype='E';}
1689       else if (p->isTagged()) {m_readytype='T';}
1690       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1691       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1692       m_left.reset();
1693       m_right.reset();
1694    }
1695    
1696    
1697  DataReady_ptr  DataReady_ptr
1698  DataLazy::resolve()  DataLazy::resolve()
1699  {  {
1700    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1701    // so the allocation of v needs to move inside the loop somehow      return m_id;
1702    }
1703    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1704    
1705    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1706    int numthreads=1;  void
1707    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1708    {
1709      if (dats.empty())
1710      {
1711            return;
1712      }
1713      vector<DataLazy*> work;
1714      FunctionSpace fs=dats[0]->getFunctionSpace();
1715      bool match=true;
1716      for (int i=dats.size()-1;i>=0;--i)
1717      {
1718            if (dats[i]->m_readytype!='E')
1719            {
1720                    dats[i]->collapse();
1721            }
1722            if (dats[i]->m_op!=IDENTITY)
1723            {
1724                    work.push_back(dats[i]);
1725                    if (fs!=dats[i]->getFunctionSpace())
1726                    {
1727                            match=false;
1728                    }
1729            }
1730      }
1731      if (work.empty())
1732      {
1733            return;         // no work to do
1734      }
1735      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1736      {             // it is possible that dats[0] is one of the objects which we discarded and
1737                    // all the other functionspaces match.
1738            vector<DataExpanded*> dep;
1739            vector<ValueType*> vecs;
1740            for (int i=0;i<work.size();++i)
1741            {
1742                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1743                    vecs.push_back(&(dep[i]->getVectorRW()));
1744            }
1745            int totalsamples=work[0]->getNumSamples();
1746            const ValueType* res=0; // Storage for answer
1747            int sample;
1748            #pragma omp parallel private(sample, res)
1749            {
1750                size_t roffset=0;
1751                #pragma omp for schedule(static)
1752                for (sample=0;sample<totalsamples;++sample)
1753                {
1754                    roffset=0;
1755                    int j;
1756                    for (j=work.size()-1;j>=0;--j)
1757                    {
1758  #ifdef _OPENMP  #ifdef _OPENMP
1759    numthreads=omp_get_max_threads();                      res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1760    int threadnum=0;  #else
1761  #endif                      res=work[j]->resolveNodeSample(0,sample,roffset);
1762    ValueType v(numthreads*threadbuffersize); // the +1 comes from the fact that I want to have a safe  #endif
1763                              // space for the RHS of ops to write to even if they don't                      RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1764                              // need it.                      memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1765  cout << "Buffer created with size=" << v.size() << endl;                  }
1766    ValueType dummy(getNoValues());              }
1767    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);          }
1768    ValueType& resvec=result->getVector();          // Now we need to load the new results as identity ops into the lazy nodes
1769            for (int i=work.size()-1;i>=0;--i)
1770            {
1771                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1772            }
1773      }
1774      else  // functionspaces do not match
1775      {
1776            for (int i=0;i<work.size();++i)
1777            {
1778                    work[i]->resolveToIdentity();
1779            }
1780      }
1781    }
1782    
1783    
1784    
1785    // This version of resolve uses storage in each node to hold results
1786    DataReady_ptr
1787    DataLazy::resolveNodeWorker()
1788    {
1789      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1790      {
1791        collapse();
1792      }
1793      if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1794      {
1795        return m_id;
1796      }
1797            // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1798      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1799      ValueType& resvec=result->getVectorRW();
1800    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1801    
1802    int sample;    int sample;
   int resoffset;  
1803    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1804    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;       // Storage for answer
1805    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1806      #pragma omp parallel private(sample,res)
1807    {    {
1808            size_t roffset=0;
1809    #ifdef LAZY_STACK_PROF
1810            stackstart[omp_get_thread_num()]=&roffset;
1811            stackend[omp_get_thread_num()]=&roffset;
1812    #endif
1813            #pragma omp for schedule(static)
1814            for (sample=0;sample<totalsamples;++sample)
1815            {
1816                    roffset=0;
1817  #ifdef _OPENMP  #ifdef _OPENMP
1818      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1819  #else  #else
1820      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1821  #endif  #endif
1822      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1823      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1824      {                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1825      resvec[resoffset]=res[i];                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1826      }          }
1827      }
1828    #ifdef LAZY_STACK_PROF
1829      for (int i=0;i<getNumberOfThreads();++i)
1830      {
1831            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1832    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1833            if (r>maxstackuse)
1834            {
1835                    maxstackuse=r;
1836            }
1837    }    }
1838      cout << "Max resolve Stack use=" << maxstackuse << endl;
1839    #endif
1840    return resptr;    return resptr;
1841  }  }
1842    
1843  std::string  std::string
1844  DataLazy::toString() const  DataLazy::toString() const
1845  {  {
1846    return "Lazy evaluation object. No details available.";    ostringstream oss;
1847      oss << "Lazy Data: [depth=" << m_height<< "] ";
1848      switch (escriptParams.getLAZY_STR_FMT())
1849      {
1850      case 1:       // tree format
1851            oss << endl;
1852            intoTreeString(oss,"");
1853            break;
1854      case 2:       // just the depth
1855            break;
1856      default:
1857            intoString(oss);
1858            break;
1859      }
1860      return oss.str();
1861  }  }
1862    
1863  // Note that in this case, deepCopy does not make copies of the leaves.  
1864  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1865    DataLazy::intoString(ostringstream& oss) const
1866    {
1867    //    oss << "[" << m_children <<";"<<m_height <<"]";
1868      switch (getOpgroup(m_op))
1869      {
1870      case G_IDENTITY:
1871            if (m_id->isExpanded())
1872            {
1873               oss << "E";
1874            }
1875            else if (m_id->isTagged())
1876            {
1877              oss << "T";
1878            }
1879            else if (m_id->isConstant())
1880            {
1881              oss << "C";
1882            }
1883            else
1884            {
1885              oss << "?";
1886            }
1887            oss << '@' << m_id.get();
1888            break;
1889      case G_BINARY:
1890            oss << '(';
1891            m_left->intoString(oss);
1892            oss << ' ' << opToString(m_op) << ' ';
1893            m_right->intoString(oss);
1894            oss << ')';
1895            break;
1896      case G_UNARY:
1897      case G_UNARY_P:
1898      case G_NP1OUT:
1899      case G_NP1OUT_P:
1900      case G_REDUCTION:
1901            oss << opToString(m_op) << '(';
1902            m_left->intoString(oss);
1903            oss << ')';
1904            break;
1905      case G_TENSORPROD:
1906            oss << opToString(m_op) << '(';
1907            m_left->intoString(oss);
1908            oss << ", ";
1909            m_right->intoString(oss);
1910            oss << ')';
1911            break;
1912      case G_NP1OUT_2P:
1913            oss << opToString(m_op) << '(';
1914            m_left->intoString(oss);
1915            oss << ", " << m_axis_offset << ", " << m_transpose;
1916            oss << ')';
1917            break;
1918      case G_CONDEVAL:
1919            oss << opToString(m_op)<< '(' ;
1920            m_mask->intoString(oss);
1921            oss << " ? ";
1922            m_left->intoString(oss);
1923            oss << " : ";
1924            m_right->intoString(oss);
1925            oss << ')';
1926            break;
1927      default:
1928            oss << "UNKNOWN";
1929      }
1930    }
1931    
1932    
1933    void
1934    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1935    {
1936      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1937      switch (getOpgroup(m_op))
1938      {
1939      case G_IDENTITY:
1940            if (m_id->isExpanded())
1941            {
1942               oss << "E";
1943            }
1944            else if (m_id->isTagged())
1945            {
1946              oss << "T";
1947            }
1948            else if (m_id->isConstant())
1949            {
1950              oss << "C";
1951            }
1952            else
1953            {
1954              oss << "?";
1955            }
1956            oss << '@' << m_id.get() << endl;
1957            break;
1958      case G_BINARY:
1959            oss << opToString(m_op) << endl;
1960            indent+='.';
1961            m_left->intoTreeString(oss, indent);
1962            m_right->intoTreeString(oss, indent);
1963            break;
1964      case G_UNARY:
1965      case G_UNARY_P:
1966      case G_NP1OUT:
1967      case G_NP1OUT_P:
1968      case G_REDUCTION:
1969            oss << opToString(m_op) << endl;
1970            indent+='.';
1971            m_left->intoTreeString(oss, indent);
1972            break;
1973      case G_TENSORPROD:
1974            oss << opToString(m_op) << endl;
1975            indent+='.';
1976            m_left->intoTreeString(oss, indent);
1977            m_right->intoTreeString(oss, indent);
1978            break;
1979      case G_NP1OUT_2P:
1980            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1981            indent+='.';
1982            m_left->intoTreeString(oss, indent);
1983            break;
1984      default:
1985            oss << "UNKNOWN";
1986      }
1987    }
1988    
1989    
1990  DataAbstract*  DataAbstract*
1991  DataLazy::deepCopy()  DataLazy::deepCopy() const
1992  {  {
1993    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1994    {    {
1995      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1996      case G_UNARY:
1997      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1998      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1999      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2000      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2001      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2002      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2003      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2004      default:
2005            throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2006    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2007  }  }
2008    
2009    
2010  DataTypes::ValueType::size_type  
2011    // There is no single, natural interpretation of getLength on DataLazy.
2012    // Instances of DataReady can look at the size of their vectors.
2013    // For lazy though, it could be the size the data would be if it were resolved;
2014    // or it could be some function of the lengths of the DataReady instances which
2015    // form part of the expression.
2016    // Rather than have people making assumptions, I have disabled the method.
2017    DataTypes::RealVectorType::size_type
2018  DataLazy::getLength() const  DataLazy::getLength() const
2019  {  {
2020    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2021  }  }
2022    
2023    
# Line 323  DataLazy::getSlice(const DataTypes::Regi Line 2027  DataLazy::getSlice(const DataTypes::Regi
2027    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2028  }  }
2029    
2030  DataTypes::ValueType::size_type  
2031    // To do this we need to rely on our child nodes
2032    DataTypes::RealVectorType::size_type
2033    DataLazy::getPointOffset(int sampleNo,
2034                     int dataPointNo)
2035    {
2036      if (m_op==IDENTITY)
2037      {
2038            return m_id->getPointOffset(sampleNo,dataPointNo);
2039      }
2040      if (m_readytype!='E')
2041      {
2042            collapse();
2043            return m_id->getPointOffset(sampleNo,dataPointNo);
2044      }
2045      // at this point we do not have an identity node and the expression will be Expanded
2046      // so we only need to know which child to ask
2047      if (m_left->m_readytype=='E')
2048      {
2049            return m_left->getPointOffset(sampleNo,dataPointNo);
2050      }
2051      else
2052      {
2053            return m_right->getPointOffset(sampleNo,dataPointNo);
2054      }
2055    }
2056    
2057    // To do this we need to rely on our child nodes
2058    DataTypes::RealVectorType::size_type
2059  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2060                   int dataPointNo) const                   int dataPointNo) const
2061  {  {
2062    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2063      {
2064            return m_id->getPointOffset(sampleNo,dataPointNo);
2065      }
2066      if (m_readytype=='E')
2067      {
2068        // at this point we do not have an identity node and the expression will be Expanded
2069        // so we only need to know which child to ask
2070        if (m_left->m_readytype=='E')
2071        {
2072            return m_left->getPointOffset(sampleNo,dataPointNo);
2073        }
2074        else
2075        {
2076            return m_right->getPointOffset(sampleNo,dataPointNo);
2077        }
2078      }
2079      if (m_readytype=='C')
2080      {
2081            return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2082      }
2083      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2084    }
2085    
2086    
2087    // I have decided to let Data:: handle this issue.
2088    void
2089    DataLazy::setToZero()
2090    {
2091    //   DataTypes::RealVectorType v(getNoValues(),0);
2092    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2093    //   m_op=IDENTITY;
2094    //   m_right.reset();  
2095    //   m_left.reset();
2096    //   m_readytype='C';
2097    //   m_buffsRequired=1;
2098    
2099      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2100      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2101  }  }
2102    
2103  }   // end namespace  bool
2104    DataLazy::actsExpanded() const
2105    {
2106            return (m_readytype=='E');
2107    }
2108    
2109    } // end namespace
2110    

Legend:
Removed from v.1885  
changed lines
  Added in v.5997

  ViewVC Help
Powered by ViewVC 1.1.26