/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 6056 by jfenwick, Thu Mar 10 04:29:53 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18    #include "Data.h"
19    #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "UnaryFuncs.h"    // for escript::fsign
23    #include "Utils.h"
24    #include "DataMaths.h"
25    
26  #ifdef USE_NETCDF  #ifdef USE_NETCDF
27  #include <netcdfcpp.h>  #include <netcdfcpp.h>
28  #endif  #endif
29  #ifdef PASO_MPI  
30  #include <mpi.h>  #include <iomanip> // for some fancy formatting in debug
31  #endif  
32  #ifdef _OPENMP  using namespace escript::DataTypes;
33  #include <omp.h>  
34  #endif  #define NO_ARG
35  #include "FunctionSpace.h"  
36  #include "DataTypes.h"  // #define LAZYDEBUG(X) if (privdebug){X;}
37  #include "Data.h"  #define LAZYDEBUG(X)
38  #include "UnaryFuncs.h"     // for escript::fsign  namespace
39    {
40    bool privdebug=false;
41    
42    #define ENABLEDEBUG privdebug=true;
43    #define DISABLEDEBUG privdebug=false;
44    }
45    
46    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47    
48    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49    
50    
51    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53    /*
54    How does DataLazy work?
55    ~~~~~~~~~~~~~~~~~~~~~~~
56    
57    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
58    denoted left and right.
59    
60    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
61    This means that all "internal" nodes in the structure are instances of DataLazy.
62    
63    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
64    Note that IDENTITY is not considered a unary operation.
65    
66    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
67    It must however form a DAG (directed acyclic graph).
68    I will refer to individual DataLazy objects with the structure as nodes.
69    
70    Each node also stores:
71    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
72            evaluated.
73    - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
74            evaluate the expression.
75    - m_samplesize ~ the number of doubles stored in a sample.
76    
77    When a new node is created, the above values are computed based on the values in the child nodes.
78    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
79    
80    The resolve method, which produces a DataReady from a DataLazy, does the following:
81    1) Create a DataReady to hold the new result.
82    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
83    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
84    
85    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
86    
87    resolveSample returns a Vector* and an offset within that vector where the result is stored.
88    Normally, this would be v, but for identity nodes their internal vector is returned instead.
89    
90    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
91    
92    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
93    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
94    
95    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
96    1) Add to the ES_optype.
97    2) determine what opgroup your operation belongs to (X)
98    3) add a string for the op to the end of ES_opstrings
99    4) increase ES_opcount
100    5) add an entry (X) to opgroups
101    6) add an entry to the switch in collapseToReady
102    7) add an entry to resolveX
103    */
104    
105    
106  using namespace std;  using namespace std;
107  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 109  using namespace boost;
109  namespace escript  namespace escript
110  {  {
111    
 const std::string&  
 opToString(ES_optype op);  
   
112  namespace  namespace
113  {  {
114    
115    
116    // enabling this will print out when ever the maximum stacksize used by resolve increases
117    // it assumes _OPENMP is also in use
118    //#define LAZY_STACK_PROF
119    
120    
121    
122    #ifndef _OPENMP
123      #ifdef LAZY_STACK_PROF
124      #undef LAZY_STACK_PROF
125      #endif
126    #endif
127    
128    
129    #ifdef LAZY_STACK_PROF
130    std::vector<void*> stackstart(getNumberOfThreads());
131    std::vector<void*> stackend(getNumberOfThreads());
132    size_t maxstackuse=0;
133    #endif
134    
135  enum ES_opgroup  enum ES_opgroup
136  {  {
137     G_UNKNOWN,     G_UNKNOWN,
138     G_IDENTITY,     G_IDENTITY,
139     G_BINARY,     G_BINARY,            // pointwise operations with two arguments
140     G_UNARY     G_UNARY,             // pointwise operations with one argument
141       G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
142       G_NP1OUT,            // non-pointwise op with one output
143       G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
144       G_TENSORPROD,        // general tensor product
145       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
146       G_REDUCTION,         // non-pointwise unary op with a scalar output
147       G_CONDEVAL
148  };  };
149    
150    
151    
152    
153  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
154              "asin","acos","atan","sinh","cosh","tanh","erf",                          "sin","cos","tan",
155              "asinh","acosh","atanh",                          "asin","acos","atan","sinh","cosh","tanh","erf",
156              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "asinh","acosh","atanh",
157              "1/","where>0","where<0","where>=0","where<=0"};                          "log10","log","sign","abs","neg","pos","exp","sqrt",
158  int ES_opcount=32;                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
159  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9                          "symmetric","nonsymmetric",
160              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16                          "prod",
161              G_UNARY,G_UNARY,G_UNARY,                    // 19                          "transpose", "trace",
162              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27                          "swapaxes",
163              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};                          "minval", "maxval",
164                            "condEval"};
165    int ES_opcount=44;
166    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167                            G_UNARY,G_UNARY,G_UNARY, //10
168                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
169                            G_UNARY,G_UNARY,G_UNARY,                                        // 20
170                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
171                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
172                            G_NP1OUT,G_NP1OUT,
173                            G_TENSORPROD,
174                            G_NP1OUT_P, G_NP1OUT_P,
175                            G_NP1OUT_2P,
176                            G_REDUCTION, G_REDUCTION,
177                            G_CONDEVAL};
178  inline  inline
179  ES_opgroup  ES_opgroup
180  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 74  getOpgroup(ES_optype op) Line 186  getOpgroup(ES_optype op)
186  FunctionSpace  FunctionSpace
187  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
188  {  {
189      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
190      // maybe we need an interpolate node -          // maybe we need an interpolate node -
191      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
192      // programming error exception.          // programming error exception.
193    
194      FunctionSpace l=left->getFunctionSpace();
195      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
196      {    if (l!=r)
197          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
198      }      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
199      return left->getFunctionSpace();      if (res==1)
200        {
201            return l;
202        }
203        if (res==-1)
204        {
205            return r;
206        }
207        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
208      }
209      return l;
210  }  }
211    
212  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
213    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
214  DataTypes::ShapeType  DataTypes::ShapeType
215  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
216  {  {
217      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
218      {          {
219          throw DataException("Shapes not the same - shapes must match for lazy data.");            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
220      }            {
221      return left->getShape();                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
222              }
223    
224              if (left->getRank()==0)       // we need to allow scalar * anything
225              {
226                    return right->getShape();
227              }
228              if (right->getRank()==0)
229              {
230                    return left->getShape();
231              }
232              throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
233            }
234            return left->getShape();
235  }  }
236    
237  size_t  // return the shape for "op left"
238  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
239    DataTypes::ShapeType
240    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
241  {  {
242     switch (getOpgroup(op))          switch(op)
243     {          {
244     case G_BINARY: return left->getLength();          case TRANS:
245     case G_UNARY: return left->getLength();             {                    // for the scoping of variables
246     default:                  const DataTypes::ShapeType& s=left->getShape();
247      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");                  DataTypes::ShapeType sh;
248     }                  int rank=left->getRank();
249                    if (axis_offset<0 || axis_offset>rank)
250                    {
251                stringstream e;
252                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
253                throw DataException(e.str());
254            }
255            for (int i=0; i<rank; i++)
256                    {
257                       int index = (axis_offset+i)%rank;
258               sh.push_back(s[index]); // Append to new shape
259            }
260                    return sh;
261               }
262            break;
263            case TRACE:
264               {
265                    int rank=left->getRank();
266                    if (rank<2)
267                    {
268                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
269                    }
270                    if ((axis_offset>rank-2) || (axis_offset<0))
271                    {
272                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
273                    }
274                    if (rank==2)
275                    {
276                       return DataTypes::scalarShape;
277                    }
278                    else if (rank==3)
279                    {
280                       DataTypes::ShapeType sh;
281                       if (axis_offset==0)
282                       {
283                            sh.push_back(left->getShape()[2]);
284                       }
285                       else         // offset==1
286                       {
287                            sh.push_back(left->getShape()[0]);
288                       }
289                       return sh;
290                    }
291                    else if (rank==4)
292                    {
293                       DataTypes::ShapeType sh;
294                       const DataTypes::ShapeType& s=left->getShape();
295                       if (axis_offset==0)
296                       {
297                            sh.push_back(s[2]);
298                            sh.push_back(s[3]);
299                       }
300                       else if (axis_offset==1)
301                       {
302                            sh.push_back(s[0]);
303                            sh.push_back(s[3]);
304                       }
305                       else         // offset==2
306                       {
307                            sh.push_back(s[0]);
308                            sh.push_back(s[1]);
309                       }
310                       return sh;
311                    }
312                    else            // unknown rank
313                    {
314                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
315                    }
316               }
317            break;
318            default:
319            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
320            }
321  }  }
322    
323  int  DataTypes::ShapeType
324  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
325  {  {
326     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
327     {       // Some of the checks are probably redundant here
328     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
329     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
330     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
331     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
332      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
333     }       int rank=left->getRank();
334         if (rank<2) {
335            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
336         }
337         if (axis0<0 || axis0>rank-1) {
338            stringstream e;
339            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
340            throw DataException(e.str());
341         }
342         if (axis1<0 || axis1>rank-1) {
343            stringstream e;
344            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
345            throw DataException(e.str());
346         }
347         if (axis0 == axis1) {
348             throw DataException("Error - Data::swapaxes: axis indices must be different.");
349         }
350         if (axis0 > axis1) {
351             axis0_tmp=axis1;
352             axis1_tmp=axis0;
353         } else {
354             axis0_tmp=axis0;
355             axis1_tmp=axis1;
356         }
357         for (int i=0; i<rank; i++) {
358           if (i == axis0_tmp) {
359              out_shape.push_back(s[axis1_tmp]);
360           } else if (i == axis1_tmp) {
361              out_shape.push_back(s[axis0_tmp]);
362           } else {
363              out_shape.push_back(s[i]);
364           }
365         }
366        return out_shape;
367  }  }
368    
369    
370    // determine the output shape for the general tensor product operation
371    // the additional parameters return information required later for the product
372    // the majority of this code is copy pasted from C_General_Tensor_Product
373    DataTypes::ShapeType
374    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
375    {
376            
377      // Get rank and shape of inputs
378      int rank0 = left->getRank();
379      int rank1 = right->getRank();
380      const DataTypes::ShapeType& shape0 = left->getShape();
381      const DataTypes::ShapeType& shape1 = right->getShape();
382    
383      // Prepare for the loops of the product and verify compatibility of shapes
384      int start0=0, start1=0;
385      if (transpose == 0)           {}
386      else if (transpose == 1)      { start0 = axis_offset; }
387      else if (transpose == 2)      { start1 = rank1-axis_offset; }
388      else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
389    
390      if (rank0<axis_offset)
391      {
392            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
393      }
394    
395      // Adjust the shapes for transpose
396      DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
397      DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
398      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
399      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
400    
401      // Prepare for the loops of the product
402      SL=1, SM=1, SR=1;
403      for (int i=0; i<rank0-axis_offset; i++)       {
404        SL *= tmpShape0[i];
405      }
406      for (int i=rank0-axis_offset; i<rank0; i++)   {
407        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
408          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
409        }
410        SM *= tmpShape0[i];
411      }
412      for (int i=axis_offset; i<rank1; i++)         {
413        SR *= tmpShape1[i];
414      }
415    
416      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
417      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
418      {                     // block to limit the scope of out_index
419         int out_index=0;
420         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
421         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
422      }
423    
424      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
425      {
426         ostringstream os;
427         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
428         throw DataException(os.str());
429      }
430    
431      return shape2;
432    }
433    
434    }       // end anonymous namespace
435    
 }   // end anonymous namespace  
436    
437    
438    // Return a string representing the operation
439  const std::string&  const std::string&
440  opToString(ES_optype op)  opToString(ES_optype op)
441  {  {
# Line 138  opToString(ES_optype op) Line 446  opToString(ES_optype op)
446    return ES_opstrings[op];    return ES_opstrings[op];
447  }  }
448    
449    void DataLazy::LazyNodeSetup()
450    {
451    #ifdef _OPENMP
452        int numthreads=omp_get_max_threads();
453        m_samples.resize(numthreads*m_samplesize);
454        m_sampleids=new int[numthreads];
455        for (int i=0;i<numthreads;++i)
456        {
457            m_sampleids[i]=-1;  
458        }
459    #else
460        m_samples.resize(m_samplesize);
461        m_sampleids=new int[1];
462        m_sampleids[0]=-1;
463    #endif  // _OPENMP
464    }
465    
466    
467    // Creates an identity node
468  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
469      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
470      m_op(IDENTITY)          ,m_sampleids(0),
471            m_samples(1)
472  {  {
473     if (p->isLazy())     if (p->isLazy())
474     {     {
475      // TODO: fix this.   We could make the new node a copy of p?          // I don't want identity of Lazy.
476      // I don't want identity of Lazy.          // Question: Why would that be so bad?
477      // Question: Why would that be so bad?          // Answer: We assume that the child of ID is something we can call getVector on
478      // Answer: We assume that the child of ID is something we can call getVector on          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
     throw DataException("Programmer error - attempt to create identity from a DataLazy.");  
479     }     }
480     else     else
481     {     {
482      m_id=dynamic_pointer_cast<DataReady>(p);          p->makeLazyShared();
483            DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
484            makeIdentity(dr);
485    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
486     }     }
487     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
488  }  }
489    
490  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
491      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
492      m_op(op)          m_op(op),
493            m_axis_offset(0),
494            m_transpose(0),
495            m_SL(0), m_SM(0), m_SR(0)
496  {  {
497     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
498     {     {
499      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
500     }     }
501    
502     DataLazy_ptr lleft;     DataLazy_ptr lleft;
503     if (!left->isLazy())     if (!left->isLazy())
504     {     {
505      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
506     }     }
507     else     else
508     {     {
509      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
510     }     }
511     m_length=left->getLength();     m_readytype=lleft->m_readytype;
512     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
513     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
514       m_children=m_left->m_children+1;
515       m_height=m_left->m_height+1;
516       LazyNodeSetup();
517       SIZELIMIT
518  }  }
519    
520    
521  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
522      : parent(resultFS(left,right,op), resultShape(left,right,op)),  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
523      m_left(left),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
524      m_right(right),          m_op(op),
525      m_op(op)          m_SL(0), m_SM(0), m_SR(0)
526  {  {
527     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
528       if ((getOpgroup(op)!=G_BINARY))
529     {     {
530      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
531       }
532    
533       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
534       {
535            FunctionSpace fs=getFunctionSpace();
536            Data ltemp(left);
537            Data tmp(ltemp,fs);
538            left=tmp.borrowDataPtr();
539       }
540       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
541       {
542            Data tmp(Data(right),getFunctionSpace());
543            right=tmp.borrowDataPtr();
544    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
545       }
546       left->operandCheck(*right);
547    
548       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
549       {
550            m_left=dynamic_pointer_cast<DataLazy>(left);
551    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
552       }
553       else
554       {
555            m_left=DataLazy_ptr(new DataLazy(left));
556    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
557       }
558       if (right->isLazy())
559       {
560            m_right=dynamic_pointer_cast<DataLazy>(right);
561    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
562       }
563       else
564       {
565            m_right=DataLazy_ptr(new DataLazy(right));
566    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
567       }
568       char lt=m_left->m_readytype;
569       char rt=m_right->m_readytype;
570       if (lt=='E' || rt=='E')
571       {
572            m_readytype='E';
573       }
574       else if (lt=='T' || rt=='T')
575       {
576            m_readytype='T';
577       }
578       else
579       {
580            m_readytype='C';
581     }     }
    m_length=resultLength(m_left,m_right,m_op);  
582     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
583     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+m_right->m_children+2;
584  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
585       LazyNodeSetup();
586       SIZELIMIT
587    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588  }  }
589    
590  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
591      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
592      m_op(op)          m_op(op),
593            m_axis_offset(axis_offset),
594            m_transpose(transpose)
595  {  {
596     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
597       {
598            throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
599       }
600       if ((transpose>2) || (transpose<0))
601       {
602            throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
603       }
604       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
605     {     {
606      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");          FunctionSpace fs=getFunctionSpace();
607            Data ltemp(left);
608            Data tmp(ltemp,fs);
609            left=tmp.borrowDataPtr();
610     }     }
611     if (left->isLazy())     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
612     {     {
613      m_left=dynamic_pointer_cast<DataLazy>(left);          Data tmp(Data(right),getFunctionSpace());
614            right=tmp.borrowDataPtr();
615       }
616    //    left->operandCheck(*right);
617    
618       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
619       {
620            m_left=dynamic_pointer_cast<DataLazy>(left);
621     }     }
622     else     else
623     {     {
624      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
625     }     }
626     if (right->isLazy())     if (right->isLazy())
627     {     {
628      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
629     }     }
630     else     else
631     {     {
632      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
633       }
634       char lt=m_left->m_readytype;
635       char rt=m_right->m_readytype;
636       if (lt=='E' || rt=='E')
637       {
638            m_readytype='E';
639       }
640       else if (lt=='T' || rt=='T')
641       {
642            m_readytype='T';
643     }     }
644       else
645       {
646            m_readytype='C';
647       }
648       m_samplesize=getNumDPPSample()*getNoValues();
649       m_children=m_left->m_children+m_right->m_children+2;
650       m_height=max(m_left->m_height,m_right->m_height)+1;
651       LazyNodeSetup();
652       SIZELIMIT
653    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
654    }
655    
656     m_length=resultLength(m_left,m_right,m_op);  
657    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
658            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
659            m_op(op),
660            m_axis_offset(axis_offset),
661            m_transpose(0),
662            m_tol(0)
663    {
664       if ((getOpgroup(op)!=G_NP1OUT_P))
665       {
666            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
667       }
668       DataLazy_ptr lleft;
669       if (!left->isLazy())
670       {
671            lleft=DataLazy_ptr(new DataLazy(left));
672       }
673       else
674       {
675            lleft=dynamic_pointer_cast<DataLazy>(left);
676       }
677       m_readytype=lleft->m_readytype;
678       m_left=lleft;
679     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
680     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+1;
681  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
682       LazyNodeSetup();
683       SIZELIMIT
684    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
685  }  }
686    
687    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
688            : parent(left->getFunctionSpace(), left->getShape()),
689            m_op(op),
690            m_axis_offset(0),
691            m_transpose(0),
692            m_tol(tol)
693    {
694       if ((getOpgroup(op)!=G_UNARY_P))
695       {
696            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
697       }
698       DataLazy_ptr lleft;
699       if (!left->isLazy())
700       {
701            lleft=DataLazy_ptr(new DataLazy(left));
702       }
703       else
704       {
705            lleft=dynamic_pointer_cast<DataLazy>(left);
706       }
707       m_readytype=lleft->m_readytype;
708       m_left=lleft;
709       m_samplesize=getNumDPPSample()*getNoValues();
710       m_children=m_left->m_children+1;
711       m_height=m_left->m_height+1;
712       LazyNodeSetup();
713       SIZELIMIT
714    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
715    }
716    
717  DataLazy::~DataLazy()  
718    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
719            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
720            m_op(op),
721            m_axis_offset(axis0),
722            m_transpose(axis1),
723            m_tol(0)
724    {
725       if ((getOpgroup(op)!=G_NP1OUT_2P))
726       {
727            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
728       }
729       DataLazy_ptr lleft;
730       if (!left->isLazy())
731       {
732            lleft=DataLazy_ptr(new DataLazy(left));
733       }
734       else
735       {
736            lleft=dynamic_pointer_cast<DataLazy>(left);
737       }
738       m_readytype=lleft->m_readytype;
739       m_left=lleft;
740       m_samplesize=getNumDPPSample()*getNoValues();
741       m_children=m_left->m_children+1;
742       m_height=m_left->m_height+1;
743       LazyNodeSetup();
744       SIZELIMIT
745    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
746    }
747    
748    
749    namespace
750    {
751    
752        inline int max3(int a, int b, int c)
753        {
754            int t=(a>b?a:b);
755            return (t>c?t:c);
756    
757        }
758    }
759    
760    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
761            : parent(left->getFunctionSpace(), left->getShape()),
762            m_op(CONDEVAL),
763            m_axis_offset(0),
764            m_transpose(0),
765            m_tol(0)
766  {  {
767    
768       DataLazy_ptr lmask;
769       DataLazy_ptr lleft;
770       DataLazy_ptr lright;
771       if (!mask->isLazy())
772       {
773            lmask=DataLazy_ptr(new DataLazy(mask));
774       }
775       else
776       {
777            lmask=dynamic_pointer_cast<DataLazy>(mask);
778       }
779       if (!left->isLazy())
780       {
781            lleft=DataLazy_ptr(new DataLazy(left));
782       }
783       else
784       {
785            lleft=dynamic_pointer_cast<DataLazy>(left);
786       }
787       if (!right->isLazy())
788       {
789            lright=DataLazy_ptr(new DataLazy(right));
790       }
791       else
792       {
793            lright=dynamic_pointer_cast<DataLazy>(right);
794       }
795       m_readytype=lmask->m_readytype;
796       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
797       {
798            throw DataException("Programmer Error - condEval arguments must have the same readytype");
799       }
800       m_left=lleft;
801       m_right=lright;
802       m_mask=lmask;
803       m_samplesize=getNumDPPSample()*getNoValues();
804       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
805       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
806       LazyNodeSetup();
807       SIZELIMIT
808    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
809  }  }
810    
811    
812  int  
813  DataLazy::getBuffsRequired() const  DataLazy::~DataLazy()
814  {  {
815      return m_buffsRequired;     delete[] m_sampleids;
816  }  }
817    
818    
819  // the vector and the offset are a place where the method could write its data if it wishes  /*
820  // it is not obligated to do so. For example, if it has its own storage already, it can use that.    \brief Evaluates the expression using methods on Data.
821  // Hence the return value to indicate where the data is actually stored.    This does the work for the collapse method.
822  // Regardless, the storage should be assumed to be used, even if it isn't.    For reasons of efficiency do not call this method on DataExpanded nodes.
823  const double*  */
824  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  DataReady_ptr
825    DataLazy::collapseToReady() const
826  {  {
827    if (m_op==IDENTITY)      if (m_readytype=='E')
828      {     // this is more an efficiency concern than anything else
829        throw DataException("Programmer Error - do not use collapse on Expanded data.");
830      }
831      if (m_op==IDENTITY)
832    {    {
833      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
834    }    }
835    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
836    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
837    const double* right=0;    Data right;
838    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
839    {    {
840      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
841    }    }
842    double* result=&(v[offset]);    Data result;
843      switch(m_op)
844    {    {
845      switch(m_op)      case ADD:
846      {          result=left+right;
847      case ADD:       // since these are pointwise ops, pretend each sample is one point          break;
848      tensor_binary_operation(m_samplesize, left, right, result, plus<double>());      case SUB:          
849      break;          result=left-right;
850      case SUB:                break;
851      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      case MUL:          
852      break;          result=left*right;
853      case MUL:                break;
854      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      case DIV:          
855      break;          result=left/right;
856      case DIV:                break;
     tensor_binary_operation(m_samplesize, left, right, result, divides<double>());  
     break;  
 // unary ops  
857      case SIN:      case SIN:
858      tensor_unary_operation(m_samplesize, left, result, ::sin);          result=left.sin();      
859      break;          break;
860      case COS:      case COS:
861      tensor_unary_operation(m_samplesize, left, result, ::cos);          result=left.cos();
862      break;          break;
863      case TAN:      case TAN:
864      tensor_unary_operation(m_samplesize, left, result, ::tan);          result=left.tan();
865      break;          break;
866      case ASIN:      case ASIN:
867      tensor_unary_operation(m_samplesize, left, result, ::asin);          result=left.asin();
868      break;          break;
869      case ACOS:      case ACOS:
870      tensor_unary_operation(m_samplesize, left, result, ::acos);          result=left.acos();
871      break;          break;
872      case ATAN:      case ATAN:
873      tensor_unary_operation(m_samplesize, left, result, ::atan);          result=left.atan();
874      break;          break;
875      case SINH:      case SINH:
876      tensor_unary_operation(m_samplesize, left, result, ::sinh);          result=left.sinh();
877      break;          break;
878      case COSH:      case COSH:
879      tensor_unary_operation(m_samplesize, left, result, ::cosh);          result=left.cosh();
880      break;          break;
881      case TANH:      case TANH:
882      tensor_unary_operation(m_samplesize, left, result, ::tanh);          result=left.tanh();
883      break;          break;
884      case ERF:      case ERF:
885  #ifdef _WIN32          result=left.erf();
886      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");          break;
887       case ASINH:
888            result=left.asinh();
889            break;
890       case ACOSH:
891            result=left.acosh();
892            break;
893       case ATANH:
894            result=left.atanh();
895            break;
896        case LOG10:
897            result=left.log10();
898            break;
899        case LOG:
900            result=left.log();
901            break;
902        case SIGN:
903            result=left.sign();
904            break;
905        case ABS:
906            result=left.abs();
907            break;
908        case NEG:
909            result=left.neg();
910            break;
911        case POS:
912            // it doesn't mean anything for delayed.
913            // it will just trigger a deep copy of the lazy object
914            throw DataException("Programmer error - POS not supported for lazy data.");
915            break;
916        case EXP:
917            result=left.exp();
918            break;
919        case SQRT:
920            result=left.sqrt();
921            break;
922        case RECIP:
923            result=left.oneOver();
924            break;
925        case GZ:
926            result=left.wherePositive();
927            break;
928        case LZ:
929            result=left.whereNegative();
930            break;
931        case GEZ:
932            result=left.whereNonNegative();
933            break;
934        case LEZ:
935            result=left.whereNonPositive();
936            break;
937        case NEZ:
938            result=left.whereNonZero(m_tol);
939            break;
940        case EZ:
941            result=left.whereZero(m_tol);
942            break;
943        case SYM:
944            result=left.symmetric();
945            break;
946        case NSYM:
947            result=left.nonsymmetric();
948            break;
949        case PROD:
950            result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
951            break;
952        case TRANS:
953            result=left.transpose(m_axis_offset);
954            break;
955        case TRACE:
956            result=left.trace(m_axis_offset);
957            break;
958        case SWAP:
959            result=left.swapaxes(m_axis_offset, m_transpose);
960            break;
961        case MINVAL:
962            result=left.minval();
963            break;
964        case MAXVAL:
965            result=left.minval();
966            break;
967        default:
968            throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
969      }
970      return result.borrowReadyPtr();
971    }
972    
973    /*
974       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
975       This method uses the original methods on the Data class to evaluate the expressions.
976       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
977       the purpose of using DataLazy in the first place).
978    */
979    void
980    DataLazy::collapse() const
981    {
982      if (m_op==IDENTITY)
983      {
984            return;
985      }
986      if (m_readytype=='E')
987      {     // this is more an efficiency concern than anything else
988        throw DataException("Programmer Error - do not use collapse on Expanded data.");
989      }
990      m_id=collapseToReady();
991      m_op=IDENTITY;
992    }
993    
994    
995    
996    
997    
998    
999    #define PROC_OP(TYPE,X)                               \
1000            for (int j=0;j<onumsteps;++j)\
1001            {\
1002              for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1003              { \
1004    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1005    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1006                 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1007    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1008                 lroffset+=leftstep; \
1009                 rroffset+=rightstep; \
1010              }\
1011              lroffset+=oleftstep;\
1012              rroffset+=orightstep;\
1013            }
1014    
1015    
1016    // The result will be stored in m_samples
1017    // The return value is a pointer to the DataVector, offset is the offset within the return value
1018    const DataTypes::RealVectorType*
1019    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1020    {
1021    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1022            // collapse so we have a 'E' node or an IDENTITY for some other type
1023      if (m_readytype!='E' && m_op!=IDENTITY)
1024      {
1025            collapse();
1026      }
1027      if (m_op==IDENTITY)  
1028      {
1029        const RealVectorType& vec=m_id->getVectorRO();
1030        roffset=m_id->getPointOffset(sampleNo, 0);
1031    #ifdef LAZY_STACK_PROF
1032    int x;
1033    if (&x<stackend[omp_get_thread_num()])
1034    {
1035           stackend[omp_get_thread_num()]=&x;
1036    }
1037    #endif
1038        return &(vec);
1039      }
1040      if (m_readytype!='E')
1041      {
1042        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1043      }
1044      if (m_sampleids[tid]==sampleNo)
1045      {
1046            roffset=tid*m_samplesize;
1047            return &(m_samples);            // sample is already resolved
1048      }
1049      m_sampleids[tid]=sampleNo;
1050    
1051      switch (getOpgroup(m_op))
1052      {
1053      case G_UNARY:
1054      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1055      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1056      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1057      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1058      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1059      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1060      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1061      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1062      default:
1063        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1064      }
1065    }
1066    
1067    const DataTypes::RealVectorType*
1068    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1069    {
1070            // we assume that any collapsing has been done before we get here
1071            // since we only have one argument we don't need to think about only
1072            // processing single points.
1073            // we will also know we won't get identity nodes
1074      if (m_readytype!='E')
1075      {
1076        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1077      }
1078      if (m_op==IDENTITY)
1079      {
1080        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1081      }
1082      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1083      const double* left=&((*leftres)[roffset]);
1084      roffset=m_samplesize*tid;
1085      double* result=&(m_samples[roffset]);
1086      switch (m_op)
1087      {
1088        case SIN:  
1089            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1090            break;
1091        case COS:
1092            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1093            break;
1094        case TAN:
1095            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1096            break;
1097        case ASIN:
1098            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1099            break;
1100        case ACOS:
1101            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1102            break;
1103        case ATAN:
1104            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1105            break;
1106        case SINH:
1107            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1108            break;
1109        case COSH:
1110            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1111            break;
1112        case TANH:
1113            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1114            break;
1115        case ERF:
1116    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1117            throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1118  #else  #else
1119      tensor_unary_operation(m_samplesize, left, result, ::erf);          tensor_unary_operation(m_samplesize, left, result, ::erf);
1120      break;          break;
1121  #endif  #endif
1122     case ASINH:     case ASINH:
1123  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1124      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1125  #else  #else
1126      tensor_unary_operation(m_samplesize, left, result, ::asinh);          tensor_unary_operation(m_samplesize, left, result, ::asinh);
1127  #endif    #endif  
1128      break;          break;
1129     case ACOSH:     case ACOSH:
1130  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1131      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1132  #else  #else
1133      tensor_unary_operation(m_samplesize, left, result, ::acosh);          tensor_unary_operation(m_samplesize, left, result, ::acosh);
1134  #endif    #endif  
1135      break;          break;
1136     case ATANH:     case ATANH:
1137  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1138      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1139  #else  #else
1140      tensor_unary_operation(m_samplesize, left, result, ::atanh);          tensor_unary_operation(m_samplesize, left, result, ::atanh);
1141  #endif    #endif  
1142      break;          break;
1143      case LOG10:      case LOG10:
1144      tensor_unary_operation(m_samplesize, left, result, ::log10);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1145      break;          break;
1146      case LOG:      case LOG:
1147      tensor_unary_operation(m_samplesize, left, result, ::log);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1148      break;          break;
1149      case SIGN:      case SIGN:
1150      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1151      break;          break;
1152      case ABS:      case ABS:
1153      tensor_unary_operation(m_samplesize, left, result, ::fabs);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1154      break;          break;
1155      case NEG:      case NEG:
1156      tensor_unary_operation(m_samplesize, left, result, negate<double>());          tensor_unary_operation(m_samplesize, left, result, negate<double>());
1157      break;          break;
1158      case POS:      case POS:
1159      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1160      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1161      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1162      break;          break;
1163      case EXP:      case EXP:
1164      tensor_unary_operation(m_samplesize, left, result, ::exp);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1165      break;          break;
1166      case SQRT:      case SQRT:
1167      tensor_unary_operation(m_samplesize, left, result, ::sqrt);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1168      break;          break;
1169      case RECIP:      case RECIP:
1170      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1171      break;          break;
1172      case GZ:      case GZ:
1173      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1174      break;          break;
1175      case LZ:      case LZ:
1176      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1177      break;          break;
1178      case GEZ:      case GEZ:
1179      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1180      break;          break;
1181      case LEZ:      case LEZ:
1182      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1183      break;          break;
1184    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1185        case NEZ:
1186            tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1187            break;
1188        case EZ:
1189            tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1190            break;
1191    
1192      default:      default:
1193      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1194      }    }
1195      return &(m_samples);
1196    }
1197    
1198    
1199    const DataTypes::RealVectorType*
1200    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1201    {
1202            // we assume that any collapsing has been done before we get here
1203            // since we only have one argument we don't need to think about only
1204            // processing single points.
1205            // we will also know we won't get identity nodes
1206      if (m_readytype!='E')
1207      {
1208        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1209      }
1210      if (m_op==IDENTITY)
1211      {
1212        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1213      }
1214      size_t loffset=0;
1215      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1216    
1217      roffset=m_samplesize*tid;
1218      unsigned int ndpps=getNumDPPSample();
1219      unsigned int psize=DataTypes::noValues(m_left->getShape());
1220      double* result=&(m_samples[roffset]);
1221      switch (m_op)
1222      {
1223        case MINVAL:
1224            {
1225              for (unsigned int z=0;z<ndpps;++z)
1226              {
1227                FMin op;
1228                *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1229                loffset+=psize;
1230                result++;
1231              }
1232            }
1233            break;
1234        case MAXVAL:
1235            {
1236              for (unsigned int z=0;z<ndpps;++z)
1237              {
1238              FMax op;
1239              *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1240              loffset+=psize;
1241              result++;
1242              }
1243            }
1244            break;
1245        default:
1246            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1247      }
1248      return &(m_samples);
1249    }
1250    
1251    const DataTypes::RealVectorType*
1252    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1253    {
1254            // we assume that any collapsing has been done before we get here
1255            // since we only have one argument we don't need to think about only
1256            // processing single points.
1257      if (m_readytype!='E')
1258      {
1259        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1260    }    }
1261    return result;    if (m_op==IDENTITY)
1262      {
1263        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1264      }
1265      size_t subroffset;
1266      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1267      roffset=m_samplesize*tid;
1268      size_t loop=0;
1269      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1270      size_t step=getNoValues();
1271      size_t offset=roffset;
1272      switch (m_op)
1273      {
1274        case SYM:
1275            for (loop=0;loop<numsteps;++loop)
1276            {
1277                DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1278                subroffset+=step;
1279                offset+=step;
1280            }
1281            break;
1282        case NSYM:
1283            for (loop=0;loop<numsteps;++loop)
1284            {
1285                DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1286                subroffset+=step;
1287                offset+=step;
1288            }
1289            break;
1290        default:
1291            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1292      }
1293      return &m_samples;
1294    }
1295    
1296    const DataTypes::RealVectorType*
1297    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1298    {
1299            // we assume that any collapsing has been done before we get here
1300            // since we only have one argument we don't need to think about only
1301            // processing single points.
1302      if (m_readytype!='E')
1303      {
1304        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1305      }
1306      if (m_op==IDENTITY)
1307      {
1308        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1309      }
1310      size_t subroffset;
1311      size_t offset;
1312      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1313      roffset=m_samplesize*tid;
1314      offset=roffset;
1315      size_t loop=0;
1316      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1317      size_t outstep=getNoValues();
1318      size_t instep=m_left->getNoValues();
1319      switch (m_op)
1320      {
1321        case TRACE:
1322            for (loop=0;loop<numsteps;++loop)
1323            {
1324                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1325                subroffset+=instep;
1326                offset+=outstep;
1327            }
1328            break;
1329        case TRANS:
1330            for (loop=0;loop<numsteps;++loop)
1331            {
1332                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1333                subroffset+=instep;
1334                offset+=outstep;
1335            }
1336            break;
1337        default:
1338            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1339      }
1340      return &m_samples;
1341  }  }
1342    
1343    
1344    const DataTypes::RealVectorType*
1345    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1346    {
1347      if (m_readytype!='E')
1348      {
1349        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1350      }
1351      if (m_op==IDENTITY)
1352      {
1353        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1354      }
1355      size_t subroffset;
1356      size_t offset;
1357      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1358      roffset=m_samplesize*tid;
1359      offset=roffset;
1360      size_t loop=0;
1361      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1362      size_t outstep=getNoValues();
1363      size_t instep=m_left->getNoValues();
1364      switch (m_op)
1365      {
1366        case SWAP:
1367            for (loop=0;loop<numsteps;++loop)
1368            {
1369                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1370                subroffset+=instep;
1371                offset+=outstep;
1372            }
1373            break;
1374        default:
1375            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1376      }
1377      return &m_samples;
1378    }
1379    
1380    const DataTypes::RealVectorType*
1381    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1382    {
1383      if (m_readytype!='E')
1384      {
1385        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1386      }
1387      if (m_op!=CONDEVAL)
1388      {
1389        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1390      }
1391      size_t subroffset;
1392    
1393      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1394      const RealVectorType* srcres=0;
1395      if ((*maskres)[subroffset]>0)
1396      {
1397            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1398      }
1399      else
1400      {
1401            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1402      }
1403    
1404      // Now we need to copy the result
1405    
1406      roffset=m_samplesize*tid;
1407      for (int i=0;i<m_samplesize;++i)
1408      {
1409            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1410      }
1411    
1412      return &m_samples;
1413    }
1414    
1415    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1416    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1417    // If both children are expanded, then we can process them in a single operation (we treat
1418    // the whole sample as one big datapoint.
1419    // If one of the children is not expanded, then we need to treat each point in the sample
1420    // individually.
1421    // There is an additional complication when scalar operations are considered.
1422    // For example, 2+Vector.
1423    // In this case each double within the point is treated individually
1424    const DataTypes::RealVectorType*
1425    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1426    {
1427    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1428    
1429      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1430            // first work out which of the children are expanded
1431      bool leftExp=(m_left->m_readytype=='E');
1432      bool rightExp=(m_right->m_readytype=='E');
1433      if (!leftExp && !rightExp)
1434      {
1435            throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1436      }
1437      bool leftScalar=(m_left->getRank()==0);
1438      bool rightScalar=(m_right->getRank()==0);
1439      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1440      {
1441            throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1442      }
1443      size_t leftsize=m_left->getNoValues();
1444      size_t rightsize=m_right->getNoValues();
1445      size_t chunksize=1;                   // how many doubles will be processed in one go
1446      int leftstep=0;               // how far should the left offset advance after each step
1447      int rightstep=0;
1448      int numsteps=0;               // total number of steps for the inner loop
1449      int oleftstep=0;      // the o variables refer to the outer loop
1450      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1451      int onumsteps=1;
1452      
1453      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1454      bool RES=(rightExp && rightScalar);
1455      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1456      bool RS=(!rightExp && rightScalar);
1457      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1458      bool RN=(!rightExp && !rightScalar);
1459      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1460      bool REN=(rightExp && !rightScalar);
1461    
1462      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1463      {
1464            chunksize=m_left->getNumDPPSample()*leftsize;
1465            leftstep=0;
1466            rightstep=0;
1467            numsteps=1;
1468      }
1469      else if (LES || RES)
1470      {
1471            chunksize=1;
1472            if (LES)                // left is an expanded scalar
1473            {
1474                    if (RS)
1475                    {
1476                       leftstep=1;
1477                       rightstep=0;
1478                       numsteps=m_left->getNumDPPSample();
1479                    }
1480                    else            // RN or REN
1481                    {
1482                       leftstep=0;
1483                       oleftstep=1;
1484                       rightstep=1;
1485                       orightstep=(RN ? -(int)rightsize : 0);
1486                       numsteps=rightsize;
1487                       onumsteps=m_left->getNumDPPSample();
1488                    }
1489            }
1490            else            // right is an expanded scalar
1491            {
1492                    if (LS)
1493                    {
1494                       rightstep=1;
1495                       leftstep=0;
1496                       numsteps=m_right->getNumDPPSample();
1497                    }
1498                    else
1499                    {
1500                       rightstep=0;
1501                       orightstep=1;
1502                       leftstep=1;
1503                       oleftstep=(LN ? -(int)leftsize : 0);
1504                       numsteps=leftsize;
1505                       onumsteps=m_right->getNumDPPSample();
1506                    }
1507            }
1508      }
1509      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1510      {
1511            if (LEN)        // and Right will be a single value
1512            {
1513                    chunksize=rightsize;
1514                    leftstep=rightsize;
1515                    rightstep=0;
1516                    numsteps=m_left->getNumDPPSample();
1517                    if (RS)
1518                    {
1519                       numsteps*=leftsize;
1520                    }
1521            }
1522            else    // REN
1523            {
1524                    chunksize=leftsize;
1525                    rightstep=leftsize;
1526                    leftstep=0;
1527                    numsteps=m_right->getNumDPPSample();
1528                    if (LS)
1529                    {
1530                       numsteps*=rightsize;
1531                    }
1532            }
1533      }
1534    
1535      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1536            // Get the values of sub-expressions
1537      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1538      const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1539    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1540    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1541    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1542    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1543    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1544    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1545    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1546    
1547    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1548    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1549    
1550    
1551      roffset=m_samplesize*tid;
1552      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1553      switch(m_op)
1554      {
1555        case ADD:
1556            //PROC_OP(NO_ARG,plus<double>());
1557          DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1558                 &(*left)[0],
1559                 &(*right)[0],
1560                 chunksize,
1561                 onumsteps,
1562                 numsteps,
1563                 resultStep,
1564                 leftstep,
1565                 rightstep,
1566                 oleftstep,
1567                 orightstep,
1568                 lroffset,
1569                 rroffset,
1570                 escript::ESFunction::PLUSF);  
1571            break;
1572        case SUB:
1573            PROC_OP(NO_ARG,minus<double>());
1574            break;
1575        case MUL:
1576            PROC_OP(NO_ARG,multiplies<double>());
1577            break;
1578        case DIV:
1579            PROC_OP(NO_ARG,divides<double>());
1580            break;
1581        case POW:
1582           PROC_OP(double (double,double),::pow);
1583            break;
1584        default:
1585            throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1586      }
1587    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1588      return &m_samples;
1589    }
1590    
1591    
1592    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1593    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1594    // unlike the other resolve helpers, we must treat these datapoints separately.
1595    const DataTypes::RealVectorType*
1596    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1597    {
1598    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1599    
1600      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1601            // first work out which of the children are expanded
1602      bool leftExp=(m_left->m_readytype=='E');
1603      bool rightExp=(m_right->m_readytype=='E');
1604      int steps=getNumDPPSample();
1605      int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1606      int rightStep=(rightExp?m_right->getNoValues() : 0);
1607    
1608      int resultStep=getNoValues();
1609      roffset=m_samplesize*tid;
1610      size_t offset=roffset;
1611    
1612      const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1613    
1614      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1615    
1616    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1617    cout << getNoValues() << endl;)
1618    
1619    
1620    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1621    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1622    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1623    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1624    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1625    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1626    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1627    
1628      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1629      switch(m_op)
1630      {
1631        case PROD:
1632            for (int i=0;i<steps;++i,resultp+=resultStep)
1633            {
1634              const double *ptr_0 = &((*left)[lroffset]);
1635              const double *ptr_1 = &((*right)[rroffset]);
1636    
1637    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1638    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1639    
1640              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1641    
1642              lroffset+=leftStep;
1643              rroffset+=rightStep;
1644            }
1645            break;
1646        default:
1647            throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1648      }
1649      roffset=offset;
1650      return &m_samples;
1651    }
1652    
1653    
1654    const DataTypes::RealVectorType*
1655    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1656    {
1657    #ifdef _OPENMP
1658            int tid=omp_get_thread_num();
1659    #else
1660            int tid=0;
1661    #endif
1662    
1663    #ifdef LAZY_STACK_PROF
1664            stackstart[tid]=&tid;
1665            stackend[tid]=&tid;
1666            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1667            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1668            #pragma omp critical
1669            if (d>maxstackuse)
1670            {
1671    cout << "Max resolve Stack use " << d << endl;
1672                    maxstackuse=d;
1673            }
1674            return r;
1675    #else
1676            return resolveNodeSample(tid, sampleNo, roffset);
1677    #endif
1678    }
1679    
1680    
1681    // This needs to do the work of the identity constructor
1682    void
1683    DataLazy::resolveToIdentity()
1684    {
1685       if (m_op==IDENTITY)
1686            return;
1687       DataReady_ptr p=resolveNodeWorker();
1688       makeIdentity(p);
1689    }
1690    
1691    void DataLazy::makeIdentity(const DataReady_ptr& p)
1692    {
1693       m_op=IDENTITY;
1694       m_axis_offset=0;
1695       m_transpose=0;
1696       m_SL=m_SM=m_SR=0;
1697       m_children=m_height=0;
1698       m_id=p;
1699       if(p->isConstant()) {m_readytype='C';}
1700       else if(p->isExpanded()) {m_readytype='E';}
1701       else if (p->isTagged()) {m_readytype='T';}
1702       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1703       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1704       m_left.reset();
1705       m_right.reset();
1706    }
1707    
1708    
1709  DataReady_ptr  DataReady_ptr
1710  DataLazy::resolve()  DataLazy::resolve()
1711  {  {
1712    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1713    // so the allocation of v needs to move inside the loop somehow      return m_id;
1714    }
1715    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1716    
1717    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1718    int numthreads=1;  void
1719    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1720    {
1721      if (dats.empty())
1722      {
1723            return;
1724      }
1725      vector<DataLazy*> work;
1726      FunctionSpace fs=dats[0]->getFunctionSpace();
1727      bool match=true;
1728      for (int i=dats.size()-1;i>=0;--i)
1729      {
1730            if (dats[i]->m_readytype!='E')
1731            {
1732                    dats[i]->collapse();
1733            }
1734            if (dats[i]->m_op!=IDENTITY)
1735            {
1736                    work.push_back(dats[i]);
1737                    if (fs!=dats[i]->getFunctionSpace())
1738                    {
1739                            match=false;
1740                    }
1741            }
1742      }
1743      if (work.empty())
1744      {
1745            return;         // no work to do
1746      }
1747      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1748      {             // it is possible that dats[0] is one of the objects which we discarded and
1749                    // all the other functionspaces match.
1750            vector<DataExpanded*> dep;
1751            vector<RealVectorType*> vecs;
1752            for (int i=0;i<work.size();++i)
1753            {
1754                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1755                    vecs.push_back(&(dep[i]->getVectorRW()));
1756            }
1757            int totalsamples=work[0]->getNumSamples();
1758            const RealVectorType* res=0; // Storage for answer
1759            int sample;
1760            #pragma omp parallel private(sample, res)
1761            {
1762                size_t roffset=0;
1763                #pragma omp for schedule(static)
1764                for (sample=0;sample<totalsamples;++sample)
1765                {
1766                    roffset=0;
1767                    int j;
1768                    for (j=work.size()-1;j>=0;--j)
1769                    {
1770  #ifdef _OPENMP  #ifdef _OPENMP
1771    numthreads=omp_get_max_threads();                      res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1772    int threadnum=0;  #else
1773  #endif                      res=work[j]->resolveNodeSample(0,sample,roffset);
1774    ValueType v(numthreads*threadbuffersize);  #endif
1775  cout << "Buffer created with size=" << v.size() << endl;                      RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1776    ValueType dummy(getNoValues());                      memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1777    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);                  }
1778    ValueType& resvec=result->getVector();              }
1779            }
1780            // Now we need to load the new results as identity ops into the lazy nodes
1781            for (int i=work.size()-1;i>=0;--i)
1782            {
1783                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1784            }
1785      }
1786      else  // functionspaces do not match
1787      {
1788            for (int i=0;i<work.size();++i)
1789            {
1790                    work[i]->resolveToIdentity();
1791            }
1792      }
1793    }
1794    
1795    
1796    
1797    // This version of resolve uses storage in each node to hold results
1798    DataReady_ptr
1799    DataLazy::resolveNodeWorker()
1800    {
1801      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1802      {
1803        collapse();
1804      }
1805      if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1806      {
1807        return m_id;
1808      }
1809            // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1810      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1811      RealVectorType& resvec=result->getVectorRW();
1812    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1813    
1814    int sample;    int sample;
   int resoffset;  
1815    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1816    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const RealVectorType* res=0;       // Storage for answer
1817    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1818      #pragma omp parallel private(sample,res)
1819    {    {
1820            size_t roffset=0;
1821    #ifdef LAZY_STACK_PROF
1822            stackstart[omp_get_thread_num()]=&roffset;
1823            stackend[omp_get_thread_num()]=&roffset;
1824    #endif
1825            #pragma omp for schedule(static)
1826            for (sample=0;sample<totalsamples;++sample)
1827            {
1828                    roffset=0;
1829  #ifdef _OPENMP  #ifdef _OPENMP
1830      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1831  #else  #else
1832      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1833  #endif  #endif
1834      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1835      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1836      {                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1837      resvec[resoffset]=res[i];                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1838      }          }
1839    }    }
1840    #ifdef LAZY_STACK_PROF
1841      for (int i=0;i<getNumberOfThreads();++i)
1842      {
1843            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1844    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1845            if (r>maxstackuse)
1846            {
1847                    maxstackuse=r;
1848            }
1849      }
1850      cout << "Max resolve Stack use=" << maxstackuse << endl;
1851    #endif
1852    return resptr;    return resptr;
1853  }  }
1854    
# Line 430  std::string Line 1856  std::string
1856  DataLazy::toString() const  DataLazy::toString() const
1857  {  {
1858    ostringstream oss;    ostringstream oss;
1859    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1860    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1861      {
1862      case 1:       // tree format
1863            oss << endl;
1864            intoTreeString(oss,"");
1865            break;
1866      case 2:       // just the depth
1867            break;
1868      default:
1869            intoString(oss);
1870            break;
1871      }
1872    return oss.str();    return oss.str();
1873  }  }
1874    
1875    
1876  void  void
1877  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1878  {  {
1879    //    oss << "[" << m_children <<";"<<m_height <<"]";
1880    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1881    {    {
1882    case G_IDENTITY:    case G_IDENTITY:
1883      oss << '@' << m_id.get();          if (m_id->isExpanded())
1884      break;          {
1885               oss << "E";
1886            }
1887            else if (m_id->isTagged())
1888            {
1889              oss << "T";
1890            }
1891            else if (m_id->isConstant())
1892            {
1893              oss << "C";
1894            }
1895            else
1896            {
1897              oss << "?";
1898            }
1899            oss << '@' << m_id.get();
1900            break;
1901    case G_BINARY:    case G_BINARY:
1902      oss << '(';          oss << '(';
1903      m_left->intoString(oss);          m_left->intoString(oss);
1904      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1905      m_right->intoString(oss);          m_right->intoString(oss);
1906      oss << ')';          oss << ')';
1907      break;          break;
1908    case G_UNARY:    case G_UNARY:
1909      oss << opToString(m_op) << '(';    case G_UNARY_P:
1910      m_left->intoString(oss);    case G_NP1OUT:
1911      oss << ')';    case G_NP1OUT_P:
1912      break;    case G_REDUCTION:
1913            oss << opToString(m_op) << '(';
1914            m_left->intoString(oss);
1915            oss << ')';
1916            break;
1917      case G_TENSORPROD:
1918            oss << opToString(m_op) << '(';
1919            m_left->intoString(oss);
1920            oss << ", ";
1921            m_right->intoString(oss);
1922            oss << ')';
1923            break;
1924      case G_NP1OUT_2P:
1925            oss << opToString(m_op) << '(';
1926            m_left->intoString(oss);
1927            oss << ", " << m_axis_offset << ", " << m_transpose;
1928            oss << ')';
1929            break;
1930      case G_CONDEVAL:
1931            oss << opToString(m_op)<< '(' ;
1932            m_mask->intoString(oss);
1933            oss << " ? ";
1934            m_left->intoString(oss);
1935            oss << " : ";
1936            m_right->intoString(oss);
1937            oss << ')';
1938            break;
1939    default:    default:
1940      oss << "UNKNOWN";          oss << "UNKNOWN";
1941    }    }
1942  }  }
1943    
1944  // Note that in this case, deepCopy does not make copies of the leaves.  
1945  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1946    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1947    {
1948      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1949      switch (getOpgroup(m_op))
1950      {
1951      case G_IDENTITY:
1952            if (m_id->isExpanded())
1953            {
1954               oss << "E";
1955            }
1956            else if (m_id->isTagged())
1957            {
1958              oss << "T";
1959            }
1960            else if (m_id->isConstant())
1961            {
1962              oss << "C";
1963            }
1964            else
1965            {
1966              oss << "?";
1967            }
1968            oss << '@' << m_id.get() << endl;
1969            break;
1970      case G_BINARY:
1971            oss << opToString(m_op) << endl;
1972            indent+='.';
1973            m_left->intoTreeString(oss, indent);
1974            m_right->intoTreeString(oss, indent);
1975            break;
1976      case G_UNARY:
1977      case G_UNARY_P:
1978      case G_NP1OUT:
1979      case G_NP1OUT_P:
1980      case G_REDUCTION:
1981            oss << opToString(m_op) << endl;
1982            indent+='.';
1983            m_left->intoTreeString(oss, indent);
1984            break;
1985      case G_TENSORPROD:
1986            oss << opToString(m_op) << endl;
1987            indent+='.';
1988            m_left->intoTreeString(oss, indent);
1989            m_right->intoTreeString(oss, indent);
1990            break;
1991      case G_NP1OUT_2P:
1992            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1993            indent+='.';
1994            m_left->intoTreeString(oss, indent);
1995            break;
1996      default:
1997            oss << "UNKNOWN";
1998      }
1999    }
2000    
2001    
2002  DataAbstract*  DataAbstract*
2003  DataLazy::deepCopy()  DataLazy::deepCopy() const
2004  {  {
2005    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
2006    {    {
2007      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2008      case G_UNARY:
2009      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2010      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2011      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2012      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2013      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2014      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2015      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2016      default:
2017            throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2018    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2019  }  }
2020    
2021    
2022  DataTypes::ValueType::size_type  
2023    // There is no single, natural interpretation of getLength on DataLazy.
2024    // Instances of DataReady can look at the size of their vectors.
2025    // For lazy though, it could be the size the data would be if it were resolved;
2026    // or it could be some function of the lengths of the DataReady instances which
2027    // form part of the expression.
2028    // Rather than have people making assumptions, I have disabled the method.
2029    DataTypes::RealVectorType::size_type
2030  DataLazy::getLength() const  DataLazy::getLength() const
2031  {  {
2032    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2033  }  }
2034    
2035    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 2039  DataLazy::getSlice(const DataTypes::Regi
2039    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2040  }  }
2041    
2042  DataTypes::ValueType::size_type  
2043    // To do this we need to rely on our child nodes
2044    DataTypes::RealVectorType::size_type
2045    DataLazy::getPointOffset(int sampleNo,
2046                     int dataPointNo)
2047    {
2048      if (m_op==IDENTITY)
2049      {
2050            return m_id->getPointOffset(sampleNo,dataPointNo);
2051      }
2052      if (m_readytype!='E')
2053      {
2054            collapse();
2055            return m_id->getPointOffset(sampleNo,dataPointNo);
2056      }
2057      // at this point we do not have an identity node and the expression will be Expanded
2058      // so we only need to know which child to ask
2059      if (m_left->m_readytype=='E')
2060      {
2061            return m_left->getPointOffset(sampleNo,dataPointNo);
2062      }
2063      else
2064      {
2065            return m_right->getPointOffset(sampleNo,dataPointNo);
2066      }
2067    }
2068    
2069    // To do this we need to rely on our child nodes
2070    DataTypes::RealVectorType::size_type
2071  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2072                   int dataPointNo) const                   int dataPointNo) const
2073  {  {
2074    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2075      {
2076            return m_id->getPointOffset(sampleNo,dataPointNo);
2077      }
2078      if (m_readytype=='E')
2079      {
2080        // at this point we do not have an identity node and the expression will be Expanded
2081        // so we only need to know which child to ask
2082        if (m_left->m_readytype=='E')
2083        {
2084            return m_left->getPointOffset(sampleNo,dataPointNo);
2085        }
2086        else
2087        {
2088            return m_right->getPointOffset(sampleNo,dataPointNo);
2089        }
2090      }
2091      if (m_readytype=='C')
2092      {
2093            return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2094      }
2095      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2096    }
2097    
2098    
2099    // I have decided to let Data:: handle this issue.
2100    void
2101    DataLazy::setToZero()
2102    {
2103    //   DataTypes::RealVectorType v(getNoValues(),0);
2104    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2105    //   m_op=IDENTITY;
2106    //   m_right.reset();  
2107    //   m_left.reset();
2108    //   m_readytype='C';
2109    //   m_buffsRequired=1;
2110    
2111      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2112      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2113  }  }
2114    
2115  }   // end namespace  bool
2116    DataLazy::actsExpanded() const
2117    {
2118            return (m_readytype=='E');
2119    }
2120    
2121    } // end namespace
2122    

Legend:
Removed from v.1888  
changed lines
  Added in v.6056

  ViewVC Help
Powered by ViewVC 1.1.26