/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2066 by jfenwick, Thu Nov 20 05:31:33 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 6066 by jfenwick, Tue Mar 15 06:42:55 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18    #include "Data.h"
19    #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "UnaryFuncs.h"    // for escript::fsign
23    #include "Utils.h"
24    #include "DataMaths.h"
25    
26  #ifdef USE_NETCDF  #ifdef USE_NETCDF
27  #include <netcdfcpp.h>  #include <netcdfcpp.h>
28  #endif  #endif
29  #ifdef PASO_MPI  
30  #include <mpi.h>  #include <iomanip> // for some fancy formatting in debug
31  #endif  
32  #ifdef _OPENMP  using namespace escript::DataTypes;
33  #include <omp.h>  
34  #endif  #define NO_ARG
35  #include "FunctionSpace.h"  
36  #include "DataTypes.h"  // #define LAZYDEBUG(X) if (privdebug){X;}
37  #include "Data.h"  #define LAZYDEBUG(X)
38  #include "UnaryFuncs.h"     // for escript::fsign  namespace
39  #include "Utils.h"  {
40    bool privdebug=false;
41    
42    #define ENABLEDEBUG privdebug=true;
43    #define DISABLEDEBUG privdebug=false;
44    }
45    
46    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47    
48    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49    
50    
51    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53  /*  /*
54  How does DataLazy work?  How does DataLazy work?
# Line 39  A special operation, IDENTITY, stores an Line 61  A special operation, IDENTITY, stores an
61  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
62    
63  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
64  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
65    
66  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
67  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 47  I will refer to individual DataLazy obje Line 69  I will refer to individual DataLazy obje
69    
70  Each node also stores:  Each node also stores:
71  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
72      evaluated.          evaluated.
73  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
74      evaluate the expression.          evaluate the expression.
75  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
76    
77  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 70  The convention that I use, is that the r Line 92  The convention that I use, is that the r
92  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
93  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
94    
95  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
96  1) Add to the ES_optype.  1) Add to the ES_optype.
97  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
98  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 90  namespace escript Line 112  namespace escript
112  namespace  namespace
113  {  {
114    
115    
116    // enabling this will print out when ever the maximum stacksize used by resolve increases
117    // it assumes _OPENMP is also in use
118    //#define LAZY_STACK_PROF
119    
120    
121    
122    #ifndef _OPENMP
123      #ifdef LAZY_STACK_PROF
124      #undef LAZY_STACK_PROF
125      #endif
126    #endif
127    
128    
129    #ifdef LAZY_STACK_PROF
130    std::vector<void*> stackstart(getNumberOfThreads());
131    std::vector<void*> stackend(getNumberOfThreads());
132    size_t maxstackuse=0;
133    #endif
134    
135  enum ES_opgroup  enum ES_opgroup
136  {  {
137     G_UNKNOWN,     G_UNKNOWN,
138     G_IDENTITY,     G_IDENTITY,
139     G_BINARY,        // pointwise operations with two arguments     G_BINARY,            // pointwise operations with two arguments
140     G_UNARY,     // pointwise operations with one argument     G_UNARY,             // pointwise operations with one argument
141     G_NP1OUT,        // non-pointwise op with one output     G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
142     G_TENSORPROD     // general tensor product     G_NP1OUT,            // non-pointwise op with one output
143       G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
144       G_TENSORPROD,        // general tensor product
145       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
146       G_REDUCTION,         // non-pointwise unary op with a scalar output
147       G_CONDEVAL
148  };  };
149    
150    
151    
152    
153  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
154              "sin","cos","tan",                          "sin","cos","tan",
155              "asin","acos","atan","sinh","cosh","tanh","erf",                          "asin","acos","atan","sinh","cosh","tanh","erf",
156              "asinh","acosh","atanh",                          "asinh","acosh","atanh",
157              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "log10","log","sign","abs","neg","pos","exp","sqrt",
158              "1/","where>0","where<0","where>=0","where<=0",                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
159              "symmetric","nonsymmetric",                          "symmetric","nonsymmetric",
160              "prod"};                          "prod",
161  int ES_opcount=36;                          "transpose", "trace",
162                            "swapaxes",
163                            "minval", "maxval",
164                            "condEval"};
165    int ES_opcount=44;
166  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167              G_UNARY,G_UNARY,G_UNARY, //10                          G_UNARY,G_UNARY,G_UNARY, //10
168              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
169              G_UNARY,G_UNARY,G_UNARY,                    // 20                          G_UNARY,G_UNARY,G_UNARY,                                        // 20
170              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
172              G_NP1OUT,G_NP1OUT,                          G_NP1OUT,G_NP1OUT,
173              G_TENSORPROD};                          G_TENSORPROD,
174                            G_NP1OUT_P, G_NP1OUT_P,
175                            G_NP1OUT_2P,
176                            G_REDUCTION, G_REDUCTION,
177                            G_CONDEVAL};
178  inline  inline
179  ES_opgroup  ES_opgroup
180  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 131  getOpgroup(ES_optype op) Line 186  getOpgroup(ES_optype op)
186  FunctionSpace  FunctionSpace
187  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
188  {  {
189      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
190      // maybe we need an interpolate node -          // maybe we need an interpolate node -
191      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
192      // programming error exception.          // programming error exception.
193    
194    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
195    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
196    if (l!=r)    if (l!=r)
197    {    {
198      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
199        if (res==1)
200      {      {
201      return l;          return l;
202      }      }
203      if (l.probeInterpolation(r))      if (res==-1)
204      {      {
205      return r;          return r;
206      }      }
207      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
208    }    }
# Line 158  resultFS(DataAbstract_ptr left, DataAbst Line 214  resultFS(DataAbstract_ptr left, DataAbst
214  DataTypes::ShapeType  DataTypes::ShapeType
215  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
216  {  {
217      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
218      {          {
219        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
220        {            {
221          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
222        }            }
223        if (left->getRank()==0)   // we need to allow scalar * anything  
224        {            if (left->getRank()==0)       // we need to allow scalar * anything
225          return right->getShape();            {
226        }                  return right->getShape();
227        if (right->getRank()==0)            }
228        {            if (right->getRank()==0)
229          return left->getShape();            {
230        }                  return left->getShape();
231        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            }
232      }            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
233      return left->getShape();          }
234            return left->getShape();
235    }
236    
237    // return the shape for "op left"
238    
239    DataTypes::ShapeType
240    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
241    {
242            switch(op)
243            {
244            case TRANS:
245               {                    // for the scoping of variables
246                    const DataTypes::ShapeType& s=left->getShape();
247                    DataTypes::ShapeType sh;
248                    int rank=left->getRank();
249                    if (axis_offset<0 || axis_offset>rank)
250                    {
251                stringstream e;
252                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
253                throw DataException(e.str());
254            }
255            for (int i=0; i<rank; i++)
256                    {
257                       int index = (axis_offset+i)%rank;
258               sh.push_back(s[index]); // Append to new shape
259            }
260                    return sh;
261               }
262            break;
263            case TRACE:
264               {
265                    int rank=left->getRank();
266                    if (rank<2)
267                    {
268                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
269                    }
270                    if ((axis_offset>rank-2) || (axis_offset<0))
271                    {
272                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
273                    }
274                    if (rank==2)
275                    {
276                       return DataTypes::scalarShape;
277                    }
278                    else if (rank==3)
279                    {
280                       DataTypes::ShapeType sh;
281                       if (axis_offset==0)
282                       {
283                            sh.push_back(left->getShape()[2]);
284                       }
285                       else         // offset==1
286                       {
287                            sh.push_back(left->getShape()[0]);
288                       }
289                       return sh;
290                    }
291                    else if (rank==4)
292                    {
293                       DataTypes::ShapeType sh;
294                       const DataTypes::ShapeType& s=left->getShape();
295                       if (axis_offset==0)
296                       {
297                            sh.push_back(s[2]);
298                            sh.push_back(s[3]);
299                       }
300                       else if (axis_offset==1)
301                       {
302                            sh.push_back(s[0]);
303                            sh.push_back(s[3]);
304                       }
305                       else         // offset==2
306                       {
307                            sh.push_back(s[0]);
308                            sh.push_back(s[1]);
309                       }
310                       return sh;
311                    }
312                    else            // unknown rank
313                    {
314                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
315                    }
316               }
317            break;
318            default:
319            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
320            }
321    }
322    
323    DataTypes::ShapeType
324    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
325    {
326         // This code taken from the Data.cpp swapaxes() method
327         // Some of the checks are probably redundant here
328         int axis0_tmp,axis1_tmp;
329         const DataTypes::ShapeType& s=left->getShape();
330         DataTypes::ShapeType out_shape;
331         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
332         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
333         int rank=left->getRank();
334         if (rank<2) {
335            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
336         }
337         if (axis0<0 || axis0>rank-1) {
338            stringstream e;
339            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
340            throw DataException(e.str());
341         }
342         if (axis1<0 || axis1>rank-1) {
343            stringstream e;
344            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
345            throw DataException(e.str());
346         }
347         if (axis0 == axis1) {
348             throw DataException("Error - Data::swapaxes: axis indices must be different.");
349         }
350         if (axis0 > axis1) {
351             axis0_tmp=axis1;
352             axis1_tmp=axis0;
353         } else {
354             axis0_tmp=axis0;
355             axis1_tmp=axis1;
356         }
357         for (int i=0; i<rank; i++) {
358           if (i == axis0_tmp) {
359              out_shape.push_back(s[axis1_tmp]);
360           } else if (i == axis1_tmp) {
361              out_shape.push_back(s[axis0_tmp]);
362           } else {
363              out_shape.push_back(s[i]);
364           }
365         }
366        return out_shape;
367  }  }
368    
369    
370  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
371  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
372  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
373  DataTypes::ShapeType  DataTypes::ShapeType
374  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
375  {  {
376                
377    // Get rank and shape of inputs    // Get rank and shape of inputs
378    int rank0 = left->getRank();    int rank0 = left->getRank();
379    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 192  GTPShape(DataAbstract_ptr left, DataAbst Line 382  GTPShape(DataAbstract_ptr left, DataAbst
382    
383    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
384    int start0=0, start1=0;    int start0=0, start1=0;
385    if (transpose == 0)       {}    if (transpose == 0)           {}
386    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
387    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
388    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
389    
390      if (rank0<axis_offset)
391      {
392            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
393      }
394    
395    // Adjust the shapes for transpose    // Adjust the shapes for transpose
396    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
397    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
398    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
399    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
400    
401    // Prepare for the loops of the product    // Prepare for the loops of the product
402    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
403    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
404      SL *= tmpShape0[i];      SL *= tmpShape0[i];
405    }    }
406    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
407      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
408        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
409      }      }
410      SM *= tmpShape0[i];      SM *= tmpShape0[i];
411    }    }
412    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
413      SR *= tmpShape1[i];      SR *= tmpShape1[i];
414    }    }
415    
416    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
417    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
418    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
419       int out_index=0;       int out_index=0;
420       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
421       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
422    }    }
   return shape2;  
 }  
423    
424      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
425      {
426         ostringstream os;
427         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
428         throw DataException(os.str());
429      }
430    
431  // determine the number of points in the result of "left op right"    return shape2;
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
432  }  }
433    
434    }       // end anonymous namespace
 }   // end anonymous namespace  
435    
436    
437    
# Line 279  opToString(ES_optype op) Line 446  opToString(ES_optype op)
446    return ES_opstrings[op];    return ES_opstrings[op];
447  }  }
448    
449    void DataLazy::LazyNodeSetup()
450    {
451    #ifdef _OPENMP
452        int numthreads=omp_get_max_threads();
453        m_samples.resize(numthreads*m_samplesize);
454        m_sampleids=new int[numthreads];
455        for (int i=0;i<numthreads;++i)
456        {
457            m_sampleids[i]=-1;  
458        }
459    #else
460        m_samples.resize(m_samplesize);
461        m_sampleids=new int[1];
462        m_sampleids[0]=-1;
463    #endif  // _OPENMP
464    }
465    
466    
467    // Creates an identity node
468  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
469      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
470      m_op(IDENTITY),          ,m_sampleids(0),
471      m_axis_offset(0),          m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
472  {  {
473     if (p->isLazy())     if (p->isLazy())
474     {     {
475      // I don't want identity of Lazy.          // I don't want identity of Lazy.
476      // Question: Why would that be so bad?          // Question: Why would that be so bad?
477      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
478      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
479     }     }
480     else     else
481     {     {
482      m_id=dynamic_pointer_cast<DataReady>(p);          p->makeLazyShared();
483      if(p->isConstant()) {m_readytype='C';}          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
484      else if(p->isExpanded()) {m_readytype='E';}          makeIdentity(dr);
485      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
486     }     }
487     m_buffsRequired=1;  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
488  }  }
489    
   
   
   
490  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
491      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
492      m_op(op),          m_op(op),
493      m_axis_offset(0),          m_axis_offset(0),
494      m_transpose(0),          m_transpose(0),
495      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
496  {  {
497     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
498     {     {
499      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
500     }     }
501    
502     DataLazy_ptr lleft;     DataLazy_ptr lleft;
503     if (!left->isLazy())     if (!left->isLazy())
504     {     {
505      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
506     }     }
507     else     else
508     {     {
509      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
510     }     }
511     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
512     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
513     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
514     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
515       m_height=m_left->m_height+1;
516       LazyNodeSetup();
517       SIZELIMIT
518  }  }
519    
520    
521  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
522  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
523      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
524      m_op(op),          m_op(op),
525      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
526  {  {
527    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
528     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
529     {     {
530      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
531     }     }
532    
533     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
534     {     {
535      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
536      Data ltemp(left);          Data ltemp(left);
537      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
538      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
539     }     }
540     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
541     {     {
542      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
543      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
544    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
545     }     }
546     left->operandCheck(*right);     left->operandCheck(*right);
547    
548     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
549     {     {
550      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
551    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
552     }     }
553     else     else
554     {     {
555      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
556    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
557     }     }
558     if (right->isLazy())     if (right->isLazy())
559     {     {
560      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
561    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
562     }     }
563     else     else
564     {     {
565      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
566    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
567     }     }
568     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
569     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
570     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
571     {     {
572      m_readytype='E';          m_readytype='E';
573     }     }
574     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
575     {     {
576      m_readytype='T';          m_readytype='T';
577     }     }
578     else     else
579     {     {
580      m_readytype='C';          m_readytype='C';
581     }     }
582     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
583     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
584     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
585  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
586       SIZELIMIT
587    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588  }  }
589    
590  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
591      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
592      m_op(op),          m_op(op),
593      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
594      m_transpose(transpose)          m_transpose(transpose)
595  {  {
596     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
597     {     {
598      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
599     }     }
600     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
601     {     {
602      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
603     }     }
604     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
605     {     {
606      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
607      Data ltemp(left);          Data ltemp(left);
608      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
609      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
610     }     }
611     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
612     {     {
613      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
614      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
615     }     }
616     left->operandCheck(*right);  //    left->operandCheck(*right);
617    
618     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
619     {     {
620      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
621     }     }
622     else     else
623     {     {
624      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
625     }     }
626     if (right->isLazy())     if (right->isLazy())
627     {     {
628      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
629     }     }
630     else     else
631     {     {
632      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
633     }     }
634     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
635     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
636     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
637     {     {
638      m_readytype='E';          m_readytype='E';
639     }     }
640     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
641     {     {
642      m_readytype='T';          m_readytype='T';
643     }     }
644     else     else
645     {     {
646      m_readytype='C';          m_readytype='C';
647     }     }
648     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
649     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
650     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
651  cout << "(4)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
652       SIZELIMIT
653    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
654  }  }
655    
656    
657  DataLazy::~DataLazy()  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
658            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
659            m_op(op),
660            m_axis_offset(axis_offset),
661            m_transpose(0),
662            m_tol(0)
663    {
664       if ((getOpgroup(op)!=G_NP1OUT_P))
665       {
666            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
667       }
668       DataLazy_ptr lleft;
669       if (!left->isLazy())
670       {
671            lleft=DataLazy_ptr(new DataLazy(left));
672       }
673       else
674       {
675            lleft=dynamic_pointer_cast<DataLazy>(left);
676       }
677       m_readytype=lleft->m_readytype;
678       m_left=lleft;
679       m_samplesize=getNumDPPSample()*getNoValues();
680       m_children=m_left->m_children+1;
681       m_height=m_left->m_height+1;
682       LazyNodeSetup();
683       SIZELIMIT
684    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
685    }
686    
687    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
688            : parent(left->getFunctionSpace(), left->getShape()),
689            m_op(op),
690            m_axis_offset(0),
691            m_transpose(0),
692            m_tol(tol)
693    {
694       if ((getOpgroup(op)!=G_UNARY_P))
695       {
696            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
697       }
698       DataLazy_ptr lleft;
699       if (!left->isLazy())
700       {
701            lleft=DataLazy_ptr(new DataLazy(left));
702       }
703       else
704       {
705            lleft=dynamic_pointer_cast<DataLazy>(left);
706       }
707       m_readytype=lleft->m_readytype;
708       m_left=lleft;
709       m_samplesize=getNumDPPSample()*getNoValues();
710       m_children=m_left->m_children+1;
711       m_height=m_left->m_height+1;
712       LazyNodeSetup();
713       SIZELIMIT
714    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
715    }
716    
717    
718    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
719            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
720            m_op(op),
721            m_axis_offset(axis0),
722            m_transpose(axis1),
723            m_tol(0)
724  {  {
725       if ((getOpgroup(op)!=G_NP1OUT_2P))
726       {
727            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
728       }
729       DataLazy_ptr lleft;
730       if (!left->isLazy())
731       {
732            lleft=DataLazy_ptr(new DataLazy(left));
733       }
734       else
735       {
736            lleft=dynamic_pointer_cast<DataLazy>(left);
737       }
738       m_readytype=lleft->m_readytype;
739       m_left=lleft;
740       m_samplesize=getNumDPPSample()*getNoValues();
741       m_children=m_left->m_children+1;
742       m_height=m_left->m_height+1;
743       LazyNodeSetup();
744       SIZELIMIT
745    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
746  }  }
747    
748    
749  int  namespace
750  DataLazy::getBuffsRequired() const  {
751    
752        inline int max3(int a, int b, int c)
753        {
754            int t=(a>b?a:b);
755            return (t>c?t:c);
756    
757        }
758    }
759    
760    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
761            : parent(left->getFunctionSpace(), left->getShape()),
762            m_op(CONDEVAL),
763            m_axis_offset(0),
764            m_transpose(0),
765            m_tol(0)
766  {  {
767      return m_buffsRequired;  
768       DataLazy_ptr lmask;
769       DataLazy_ptr lleft;
770       DataLazy_ptr lright;
771       if (!mask->isLazy())
772       {
773            lmask=DataLazy_ptr(new DataLazy(mask));
774       }
775       else
776       {
777            lmask=dynamic_pointer_cast<DataLazy>(mask);
778       }
779       if (!left->isLazy())
780       {
781            lleft=DataLazy_ptr(new DataLazy(left));
782       }
783       else
784       {
785            lleft=dynamic_pointer_cast<DataLazy>(left);
786       }
787       if (!right->isLazy())
788       {
789            lright=DataLazy_ptr(new DataLazy(right));
790       }
791       else
792       {
793            lright=dynamic_pointer_cast<DataLazy>(right);
794       }
795       m_readytype=lmask->m_readytype;
796       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
797       {
798            throw DataException("Programmer Error - condEval arguments must have the same readytype");
799       }
800       m_left=lleft;
801       m_right=lright;
802       m_mask=lmask;
803       m_samplesize=getNumDPPSample()*getNoValues();
804       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
805       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
806       LazyNodeSetup();
807       SIZELIMIT
808    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
809  }  }
810    
811    
812  size_t  
813  DataLazy::getMaxSampleSize() const  DataLazy::~DataLazy()
814  {  {
815      return m_maxsamplesize;     delete[] m_sampleids;
816  }  }
817    
818    
819  /*  /*
820    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
821    This does the work for the collapse method.    This does the work for the collapse method.
822    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
823  */  */
824  DataReady_ptr  DataReady_ptr
825  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
826  {  {
827    if (m_readytype=='E')    if (m_readytype=='E')
828    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
829      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
830    }    }
831    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 511  DataLazy::collapseToReady() Line 843  DataLazy::collapseToReady()
843    switch(m_op)    switch(m_op)
844    {    {
845      case ADD:      case ADD:
846      result=left+right;          result=left+right;
847      break;          break;
848      case SUB:            case SUB:          
849      result=left-right;          result=left-right;
850      break;          break;
851      case MUL:            case MUL:          
852      result=left*right;          result=left*right;
853      break;          break;
854      case DIV:            case DIV:          
855      result=left/right;          result=left/right;
856      break;          break;
857      case SIN:      case SIN:
858      result=left.sin();            result=left.sin();      
859      break;          break;
860      case COS:      case COS:
861      result=left.cos();          result=left.cos();
862      break;          break;
863      case TAN:      case TAN:
864      result=left.tan();          result=left.tan();
865      break;          break;
866      case ASIN:      case ASIN:
867      result=left.asin();          result=left.asin();
868      break;          break;
869      case ACOS:      case ACOS:
870      result=left.acos();          result=left.acos();
871      break;          break;
872      case ATAN:      case ATAN:
873      result=left.atan();          result=left.atan();
874      break;          break;
875      case SINH:      case SINH:
876      result=left.sinh();          result=left.sinh();
877      break;          break;
878      case COSH:      case COSH:
879      result=left.cosh();          result=left.cosh();
880      break;          break;
881      case TANH:      case TANH:
882      result=left.tanh();          result=left.tanh();
883      break;          break;
884      case ERF:      case ERF:
885      result=left.erf();          result=left.erf();
886      break;          break;
887     case ASINH:     case ASINH:
888      result=left.asinh();          result=left.asinh();
889      break;          break;
890     case ACOSH:     case ACOSH:
891      result=left.acosh();          result=left.acosh();
892      break;          break;
893     case ATANH:     case ATANH:
894      result=left.atanh();          result=left.atanh();
895      break;          break;
896      case LOG10:      case LOG10:
897      result=left.log10();          result=left.log10();
898      break;          break;
899      case LOG:      case LOG:
900      result=left.log();          result=left.log();
901      break;          break;
902      case SIGN:      case SIGN:
903      result=left.sign();          result=left.sign();
904      break;          break;
905      case ABS:      case ABS:
906      result=left.abs();          result=left.abs();
907      break;          break;
908      case NEG:      case NEG:
909      result=left.neg();          result=left.neg();
910      break;          break;
911      case POS:      case POS:
912      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
913      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
914      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
915      break;          break;
916      case EXP:      case EXP:
917      result=left.exp();          result=left.exp();
918      break;          break;
919      case SQRT:      case SQRT:
920      result=left.sqrt();          result=left.sqrt();
921      break;          break;
922      case RECIP:      case RECIP:
923      result=left.oneOver();          result=left.oneOver();
924      break;          break;
925      case GZ:      case GZ:
926      result=left.wherePositive();          result=left.wherePositive();
927      break;          break;
928      case LZ:      case LZ:
929      result=left.whereNegative();          result=left.whereNegative();
930      break;          break;
931      case GEZ:      case GEZ:
932      result=left.whereNonNegative();          result=left.whereNonNegative();
933      break;          break;
934      case LEZ:      case LEZ:
935      result=left.whereNonPositive();          result=left.whereNonPositive();
936      break;          break;
937        case NEZ:
938            result=left.whereNonZero(m_tol);
939            break;
940        case EZ:
941            result=left.whereZero(m_tol);
942            break;
943      case SYM:      case SYM:
944      result=left.symmetric();          result=left.symmetric();
945      break;          break;
946      case NSYM:      case NSYM:
947      result=left.nonsymmetric();          result=left.nonsymmetric();
948      break;          break;
949      case PROD:      case PROD:
950      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
951      break;          break;
952        case TRANS:
953            result=left.transpose(m_axis_offset);
954            break;
955        case TRACE:
956            result=left.trace(m_axis_offset);
957            break;
958        case SWAP:
959            result=left.swapaxes(m_axis_offset, m_transpose);
960            break;
961        case MINVAL:
962            result=left.minval();
963            break;
964        case MAXVAL:
965            result=left.minval();
966            break;
967      default:      default:
968      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
969    }    }
970    return result.borrowReadyPtr();    return result.borrowReadyPtr();
971  }  }
# Line 624  DataLazy::collapseToReady() Line 977  DataLazy::collapseToReady()
977     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
978  */  */
979  void  void
980  DataLazy::collapse()  DataLazy::collapse() const
981  {  {
982    if (m_op==IDENTITY)    if (m_op==IDENTITY)
983    {    {
984      return;          return;
985    }    }
986    if (m_readytype=='E')    if (m_readytype=='E')
987    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
988      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
989    }    }
990    m_id=collapseToReady();    m_id=collapseToReady();
991    m_op=IDENTITY;    m_op=IDENTITY;
992  }  }
993    
994  /*  // The result will be stored in m_samples
995    \brief Compute the value of the expression (unary operation) for the given sample.  // The return value is a pointer to the DataVector, offset is the offset within the return value
996    \return Vector which stores the value of the subexpression for the given sample.  const DataTypes::RealVectorType*
997    \param v A vector to store intermediate results.  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
998    \param offset Index in v to begin storing results.  {
999    \param sampleNo Sample number to evaluate.  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1000    \param roffset (output parameter) the offset in the return vector where the result begins.          // collapse so we have a 'E' node or an IDENTITY for some other type
1001      if (m_readytype!='E' && m_op!=IDENTITY)
1002    The return value will be an existing vector so do not deallocate it.    {
1003    If the result is stored in v it should be stored at the offset given.          collapse();
1004    Everything from offset to the end of v should be considered available for this method to use.    }
1005  */    if (m_op==IDENTITY)  
1006  DataTypes::ValueType*    {
1007  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const      const RealVectorType& vec=m_id->getVectorRO();
1008        roffset=m_id->getPointOffset(sampleNo, 0);
1009    #ifdef LAZY_STACK_PROF
1010    int x;
1011    if (&x<stackend[omp_get_thread_num()])
1012    {
1013           stackend[omp_get_thread_num()]=&x;
1014    }
1015    #endif
1016        return &(vec);
1017      }
1018      if (m_readytype!='E')
1019      {
1020        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1021      }
1022      if (m_sampleids[tid]==sampleNo)
1023      {
1024            roffset=tid*m_samplesize;
1025            return &(m_samples);            // sample is already resolved
1026      }
1027      m_sampleids[tid]=sampleNo;
1028    
1029      switch (getOpgroup(m_op))
1030      {
1031      case G_UNARY:
1032      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1033      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1034      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1035      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1036      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1037      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1038      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1039      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1040      default:
1041        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1042      }
1043    }
1044    
1045    const DataTypes::RealVectorType*
1046    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1047  {  {
1048      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1049      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1050      // processing single points.          // processing single points.
1051            // we will also know we won't get identity nodes
1052    if (m_readytype!='E')    if (m_readytype!='E')
1053    {    {
1054      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1055    }    }
1056    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1057    const double* left=&((*vleft)[roffset]);    {
1058    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1059    roffset=offset;    }
1060      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1061      const double* left=&((*leftres)[roffset]);
1062      roffset=m_samplesize*tid;
1063      double* result=&(m_samples[roffset]);
1064      escript::ESFunction operation=SINF;
1065    switch (m_op)    switch (m_op)
1066    {    {
1067      case SIN:        case SIN:
1068      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);      operation=SINF;
1069      break;      break;
1070      case COS:      case COS:
1071      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);          operation=COSF;
1072      break;      break;
1073      case TAN:      case TAN:
1074      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);          operation=TANF;
1075      break;      break;
1076      case ASIN:      case ASIN:
1077      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);          operation=ASINF;
1078      break;      break;
1079      case ACOS:      case ACOS:
1080      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);          operation=ACOSF;
1081      break;      break;
1082      case ATAN:      case ATAN:
1083      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);          operation=ATANF;
1084      break;      break;
1085      case SINH:      case SINH:
1086      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);          operation=SINHF;
1087      break;      break;
1088      case COSH:      case COSH:
1089      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);          operation=COSHF;
1090      break;      break;
1091      case TANH:      case TANH:
1092      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);          operation=TANHF;
1093      break;      break;
1094      case ERF:      case ERF:
1095  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ERFF;
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
1096      break;      break;
 #endif  
1097     case ASINH:     case ASINH:
1098  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ASINHF;
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
1099      break;      break;
1100     case ACOSH:     case ACOSH:
1101  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ACOSHF;
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
1102      break;      break;
1103     case ATANH:     case ATANH:
1104  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ATANHF;
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
1105      break;      break;
1106      case LOG10:      case LOG10:
1107      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);          operation=LOG10F;
1108      break;      break;
1109      case LOG:      case LOG:
1110      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);          operation=LOGF;
1111      break;      break;
1112      case SIGN:      case SIGN:
1113      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          operation=SIGNF;
1114      break;      break;
1115      case ABS:      case ABS:
1116      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);          operation=ABSF;
1117      break;      break;
1118      case NEG:      case NEG:
1119      tensor_unary_operation(m_samplesize, left, result, negate<double>());          operation=NEGF;
1120      break;      break;
1121      case POS:      case POS:
1122      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1123      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1124      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1125      break;          break;
1126      case EXP:      case EXP:
1127      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);          operation=EXPF;
1128      break;      break;
1129      case SQRT:      case SQRT:
1130      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);          operation=SQRTF;
1131      break;      break;
1132      case RECIP:      case RECIP:
1133      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          operation=INVF;
1134      break;      break;
1135      case GZ:      case GZ:
1136      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          operation=GTZEROF;
1137      break;      break;
1138      case LZ:      case LZ:
1139      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          operation=LTZEROF;
1140      break;      break;
1141      case GEZ:      case GEZ:
1142      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          operation=GEZEROF;
1143      break;      break;
1144      case LEZ:      case LEZ:
1145      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          operation=LEZEROF;
1146        break;
1147    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1148        case NEZ:
1149            operation=NEQZEROF;
1150        break;
1151        case EZ:
1152            operation=EQZEROF;
1153    cerr << "Trying to evaluate " << m_tol << endl;
1154        
1155      break;      break;
   
1156      default:      default:
1157      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1158    }    }
1159    return &v;    tensor_unary_array_operation(m_samplesize,
1160                                 left,
1161                                 result,
1162                                 operation,
1163                                 m_tol);  
1164      return &(m_samples);
1165  }  }
1166    
1167    
1168  /*  const DataTypes::RealVectorType*
1169    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1170  {  {
1171      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1172      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1173      // processing single points.          // processing single points.
1174            // we will also know we won't get identity nodes
1175    if (m_readytype!='E')    if (m_readytype!='E')
1176    {    {
1177      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1178    }    }
1179      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1180    size_t subroffset=roffset+m_samplesize;    {
1181    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1182    roffset=offset;    }
1183      size_t loffset=0;
1184      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1185    
1186      roffset=m_samplesize*tid;
1187      unsigned int ndpps=getNumDPPSample();
1188      unsigned int psize=DataTypes::noValues(m_left->getShape());
1189      double* result=&(m_samples[roffset]);
1190      switch (m_op)
1191      {
1192        case MINVAL:
1193            {
1194              for (unsigned int z=0;z<ndpps;++z)
1195              {
1196                FMin op;
1197                *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1198                loffset+=psize;
1199                result++;
1200              }
1201            }
1202            break;
1203        case MAXVAL:
1204            {
1205              for (unsigned int z=0;z<ndpps;++z)
1206              {
1207              FMax op;
1208              *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1209              loffset+=psize;
1210              result++;
1211              }
1212            }
1213            break;
1214        default:
1215            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1216      }
1217      return &(m_samples);
1218    }
1219    
1220    const DataTypes::RealVectorType*
1221    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1222    {
1223            // we assume that any collapsing has been done before we get here
1224            // since we only have one argument we don't need to think about only
1225            // processing single points.
1226      if (m_readytype!='E')
1227      {
1228        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1229      }
1230      if (m_op==IDENTITY)
1231      {
1232        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1233      }
1234      size_t subroffset;
1235      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1236      roffset=m_samplesize*tid;
1237      size_t loop=0;
1238      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1239      size_t step=getNoValues();
1240      size_t offset=roffset;
1241    switch (m_op)    switch (m_op)
1242    {    {
1243      case SYM:      case SYM:
1244      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1245      break;          {
1246                DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1247                subroffset+=step;
1248                offset+=step;
1249            }
1250            break;
1251      case NSYM:      case NSYM:
1252      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1253      break;          {
1254                DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1255                subroffset+=step;
1256                offset+=step;
1257            }
1258            break;
1259        default:
1260            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1261      }
1262      return &m_samples;
1263    }
1264    
1265    const DataTypes::RealVectorType*
1266    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1267    {
1268            // we assume that any collapsing has been done before we get here
1269            // since we only have one argument we don't need to think about only
1270            // processing single points.
1271      if (m_readytype!='E')
1272      {
1273        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1274      }
1275      if (m_op==IDENTITY)
1276      {
1277        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1278      }
1279      size_t subroffset;
1280      size_t offset;
1281      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1282      roffset=m_samplesize*tid;
1283      offset=roffset;
1284      size_t loop=0;
1285      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1286      size_t outstep=getNoValues();
1287      size_t instep=m_left->getNoValues();
1288      switch (m_op)
1289      {
1290        case TRACE:
1291            for (loop=0;loop<numsteps;++loop)
1292            {
1293                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1294                subroffset+=instep;
1295                offset+=outstep;
1296            }
1297            break;
1298        case TRANS:
1299            for (loop=0;loop<numsteps;++loop)
1300            {
1301                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1302                subroffset+=instep;
1303                offset+=outstep;
1304            }
1305            break;
1306        default:
1307            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1308      }
1309      return &m_samples;
1310    }
1311    
1312    
1313    const DataTypes::RealVectorType*
1314    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1315    {
1316      if (m_readytype!='E')
1317      {
1318        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1319      }
1320      if (m_op==IDENTITY)
1321      {
1322        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1323      }
1324      size_t subroffset;
1325      size_t offset;
1326      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1327      roffset=m_samplesize*tid;
1328      offset=roffset;
1329      size_t loop=0;
1330      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1331      size_t outstep=getNoValues();
1332      size_t instep=m_left->getNoValues();
1333      switch (m_op)
1334      {
1335        case SWAP:
1336            for (loop=0;loop<numsteps;++loop)
1337            {
1338                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1339                subroffset+=instep;
1340                offset+=outstep;
1341            }
1342            break;
1343      default:      default:
1344      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1345    }    }
1346    return &v;    return &m_samples;
1347  }  }
1348    
1349    const DataTypes::RealVectorType*
1350    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1351    {
1352      if (m_readytype!='E')
1353      {
1354        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1355      }
1356      if (m_op!=CONDEVAL)
1357      {
1358        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1359      }
1360      size_t subroffset;
1361    
1362      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1363      const RealVectorType* srcres=0;
1364      if ((*maskres)[subroffset]>0)
1365      {
1366            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1367      }
1368      else
1369      {
1370            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1371      }
1372    
1373      // Now we need to copy the result
1374    
1375      roffset=m_samplesize*tid;
1376      for (int i=0;i<m_samplesize;++i)
1377      {
1378            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1379      }
1380    
1381  #define PROC_OP(TYPE,X)                               \    return &m_samples;
1382      for (int i=0;i<steps;++i,resultp+=resultStep) \  }
     { \  
        tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  
        lroffset+=leftStep; \  
        rroffset+=rightStep; \  
     }  
1383    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1384  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1385  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1386  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 842  DataLazy::resolveNP1OUT(ValueType& v, si Line 1390  DataLazy::resolveNP1OUT(ValueType& v, si
1390  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1391  // For example, 2+Vector.  // For example, 2+Vector.
1392  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1393  DataTypes::ValueType*  const DataTypes::RealVectorType*
1394  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1395  {  {
1396  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1397    
1398    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1399      // first work out which of the children are expanded          // first work out which of the children are expanded
1400    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1401    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1402    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1403    int steps=(bigloops?1:getNumDPPSample());    {
1404    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1405    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1406    {    bool leftScalar=(m_left->getRank()==0);
1407      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1408      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1409      chunksize=1;    // for scalar    {
1410    }              throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1411    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1412    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1413    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1414      // Get the values of sub-expressions    size_t chunksize=1;                   // how many doubles will be processed in one go
1415    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    int leftstep=0;               // how far should the left offset advance after each step
1416    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    int rightstep=0;
1417      // the right child starts further along.    int numsteps=0;               // total number of steps for the inner loop
1418    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    int oleftstep=0;      // the o variables refer to the outer loop
1419      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1420      int onumsteps=1;
1421      
1422      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1423      bool RES=(rightExp && rightScalar);
1424      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1425      bool RS=(!rightExp && rightScalar);
1426      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1427      bool RN=(!rightExp && !rightScalar);
1428      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1429      bool REN=(rightExp && !rightScalar);
1430    
1431      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1432      {
1433            chunksize=m_left->getNumDPPSample()*leftsize;
1434            leftstep=0;
1435            rightstep=0;
1436            numsteps=1;
1437      }
1438      else if (LES || RES)
1439      {
1440            chunksize=1;
1441            if (LES)                // left is an expanded scalar
1442            {
1443                    if (RS)
1444                    {
1445                       leftstep=1;
1446                       rightstep=0;
1447                       numsteps=m_left->getNumDPPSample();
1448                    }
1449                    else            // RN or REN
1450                    {
1451                       leftstep=0;
1452                       oleftstep=1;
1453                       rightstep=1;
1454                       orightstep=(RN ? -(int)rightsize : 0);
1455                       numsteps=rightsize;
1456                       onumsteps=m_left->getNumDPPSample();
1457                    }
1458            }
1459            else            // right is an expanded scalar
1460            {
1461                    if (LS)
1462                    {
1463                       rightstep=1;
1464                       leftstep=0;
1465                       numsteps=m_right->getNumDPPSample();
1466                    }
1467                    else
1468                    {
1469                       rightstep=0;
1470                       orightstep=1;
1471                       leftstep=1;
1472                       oleftstep=(LN ? -(int)leftsize : 0);
1473                       numsteps=leftsize;
1474                       onumsteps=m_right->getNumDPPSample();
1475                    }
1476            }
1477      }
1478      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1479      {
1480            if (LEN)        // and Right will be a single value
1481            {
1482                    chunksize=rightsize;
1483                    leftstep=rightsize;
1484                    rightstep=0;
1485                    numsteps=m_left->getNumDPPSample();
1486                    if (RS)
1487                    {
1488                       numsteps*=leftsize;
1489                    }
1490            }
1491            else    // REN
1492            {
1493                    chunksize=leftsize;
1494                    rightstep=leftsize;
1495                    leftstep=0;
1496                    numsteps=m_right->getNumDPPSample();
1497                    if (LS)
1498                    {
1499                       numsteps*=rightsize;
1500                    }
1501            }
1502      }
1503    
1504      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1505            // Get the values of sub-expressions
1506      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1507      const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1508    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1509    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1510    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1511    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1512    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1513    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1514    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1515    
1516    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1517    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1518    
1519    
1520      roffset=m_samplesize*tid;
1521      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1522    switch(m_op)    switch(m_op)
1523    {    {
1524      case ADD:      case ADD:
1525          PROC_OP(NO_ARG,plus<double>());          //PROC_OP(NO_ARG,plus<double>());
1526      break;        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1527                 &(*left)[0],
1528                 &(*right)[0],
1529                 chunksize,
1530                 onumsteps,
1531                 numsteps,
1532                 resultStep,
1533                 leftstep,
1534                 rightstep,
1535                 oleftstep,
1536                 orightstep,
1537                 lroffset,
1538                 rroffset,
1539                 escript::ESFunction::PLUSF);  
1540            break;
1541      case SUB:      case SUB:
1542      PROC_OP(NO_ARG,minus<double>());        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1543      break;               &(*left)[0],
1544                 &(*right)[0],
1545                 chunksize,
1546                 onumsteps,
1547                 numsteps,
1548                 resultStep,
1549                 leftstep,
1550                 rightstep,
1551                 oleftstep,
1552                 orightstep,
1553                 lroffset,
1554                 rroffset,
1555                 escript::ESFunction::MINUSF);        
1556            //PROC_OP(NO_ARG,minus<double>());
1557            break;
1558      case MUL:      case MUL:
1559      PROC_OP(NO_ARG,multiplies<double>());          //PROC_OP(NO_ARG,multiplies<double>());
1560      break;        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1561                 &(*left)[0],
1562                 &(*right)[0],
1563                 chunksize,
1564                 onumsteps,
1565                 numsteps,
1566                 resultStep,
1567                 leftstep,
1568                 rightstep,
1569                 oleftstep,
1570                 orightstep,
1571                 lroffset,
1572                 rroffset,
1573                 escript::ESFunction::MULTIPLIESF);      
1574            break;
1575      case DIV:      case DIV:
1576      PROC_OP(NO_ARG,divides<double>());          //PROC_OP(NO_ARG,divides<double>());
1577      break;        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1578                 &(*left)[0],
1579                 &(*right)[0],
1580                 chunksize,
1581                 onumsteps,
1582                 numsteps,
1583                 resultStep,
1584                 leftstep,
1585                 rightstep,
1586                 oleftstep,
1587                 orightstep,
1588                 lroffset,
1589                 rroffset,
1590                 escript::ESFunction::DIVIDESF);          
1591            break;
1592      case POW:      case POW:
1593         PROC_OP(double (double,double),::pow);         //PROC_OP(double (double,double),::pow);
1594      break;        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1595                 &(*left)[0],
1596                 &(*right)[0],
1597                 chunksize,
1598                 onumsteps,
1599                 numsteps,
1600                 resultStep,
1601                 leftstep,
1602                 rightstep,
1603                 oleftstep,
1604                 orightstep,
1605                 lroffset,
1606                 rroffset,
1607                 escript::ESFunction::POWF);          
1608            break;
1609      default:      default:
1610      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1611    }    }
1612    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1613    return &v;    return &m_samples;
1614  }  }
1615    
1616    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1617  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1618  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1619  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1620  DataTypes::ValueType*  const DataTypes::RealVectorType*
1621  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1622  {  {
1623  cout << "Resolve TensorProduct: " << toString() << endl;  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1624    
1625    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1626      // first work out which of the children are expanded          // first work out which of the children are expanded
1627    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1628    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1629    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1630    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1631    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1632    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1633      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1634    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1635    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1636    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1637      const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1638    
1639      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1640    
1641    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1642    cout << getNoValues() << endl;)
1643    
1644    
1645    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1646    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1647    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1648    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1649    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1650    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1651    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1652    
1653      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1654    switch(m_op)    switch(m_op)
1655    {    {
1656      case PROD:      case PROD:
1657      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1658      {          {
1659            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1660            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1661            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
1662        lroffset+=leftStep;  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1663        rroffset+=rightStep;  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1664      }  
1665      break;            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1666    
1667              lroffset+=leftStep;
1668              rroffset+=rightStep;
1669            }
1670            break;
1671      default:      default:
1672      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1673    }    }
1674    roffset=offset;    roffset=offset;
1675    return &v;    return &m_samples;
1676  }  }
1677    
1678    
1679    const DataTypes::RealVectorType*
1680    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1681    {
1682    #ifdef _OPENMP
1683            int tid=omp_get_thread_num();
1684    #else
1685            int tid=0;
1686    #endif
1687    
1688  /*  #ifdef LAZY_STACK_PROF
1689    \brief Compute the value of the expression for the given sample.          stackstart[tid]=&tid;
1690    \return Vector which stores the value of the subexpression for the given sample.          stackend[tid]=&tid;
1691    \param v A vector to store intermediate results.          const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1692    \param offset Index in v to begin storing results.          size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1693    \param sampleNo Sample number to evaluate.          #pragma omp critical
1694    \param roffset (output parameter) the offset in the return vector where the result begins.          if (d>maxstackuse)
1695            {
1696    cout << "Max resolve Stack use " << d << endl;
1697                    maxstackuse=d;
1698            }
1699            return r;
1700    #else
1701            return resolveNodeSample(tid, sampleNo, roffset);
1702    #endif
1703    }
1704    
1705    The return value will be an existing vector so do not deallocate it.  
1706  */  // This needs to do the work of the identity constructor
1707  // the vector and the offset are a place where the method could write its data if it wishes  void
1708  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::resolveToIdentity()
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
1709  {  {
1710  cout << "Resolve sample " << toString() << endl;     if (m_op==IDENTITY)
1711      // collapse so we have a 'E' node or an IDENTITY for some other type          return;
1712    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1713    {     makeIdentity(p);
1714      collapse();  }
1715    }  
1716    if (m_op==IDENTITY)    void DataLazy::makeIdentity(const DataReady_ptr& p)
1717    {
1718       m_op=IDENTITY;
1719       m_axis_offset=0;
1720       m_transpose=0;
1721       m_SL=m_SM=m_SR=0;
1722       m_children=m_height=0;
1723       m_id=p;
1724       if(p->isConstant()) {m_readytype='C';}
1725       else if(p->isExpanded()) {m_readytype='E';}
1726       else if (p->isTagged()) {m_readytype='T';}
1727       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1728       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1729       m_left.reset();
1730       m_right.reset();
1731    }
1732    
1733    
1734    DataReady_ptr
1735    DataLazy::resolve()
1736    {
1737        resolveToIdentity();
1738        return m_id;
1739    }
1740    
1741    
1742    /* This is really a static method but I think that caused problems in windows */
1743    void
1744    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1745    {
1746      if (dats.empty())
1747    {    {
1748      const ValueType& vec=m_id->getVector();          return;
     if (m_readytype=='C')  
     {  
     roffset=0;  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
     return &(vec);  
1749    }    }
1750    if (m_readytype!='E')    vector<DataLazy*> work;
1751    {    FunctionSpace fs=dats[0]->getFunctionSpace();
1752      throw DataException("Programmer Error - Collapse did not produce an expanded node.");    bool match=true;
1753      for (int i=dats.size()-1;i>=0;--i)
1754      {
1755            if (dats[i]->m_readytype!='E')
1756            {
1757                    dats[i]->collapse();
1758            }
1759            if (dats[i]->m_op!=IDENTITY)
1760            {
1761                    work.push_back(dats[i]);
1762                    if (fs!=dats[i]->getFunctionSpace())
1763                    {
1764                            match=false;
1765                    }
1766            }
1767      }
1768      if (work.empty())
1769      {
1770            return;         // no work to do
1771      }
1772      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1773      {             // it is possible that dats[0] is one of the objects which we discarded and
1774                    // all the other functionspaces match.
1775            vector<DataExpanded*> dep;
1776            vector<RealVectorType*> vecs;
1777            for (int i=0;i<work.size();++i)
1778            {
1779                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1780                    vecs.push_back(&(dep[i]->getVectorRW()));
1781            }
1782            int totalsamples=work[0]->getNumSamples();
1783            const RealVectorType* res=0; // Storage for answer
1784            int sample;
1785            #pragma omp parallel private(sample, res)
1786            {
1787                size_t roffset=0;
1788                #pragma omp for schedule(static)
1789                for (sample=0;sample<totalsamples;++sample)
1790                {
1791                    roffset=0;
1792                    int j;
1793                    for (j=work.size()-1;j>=0;--j)
1794                    {
1795    #ifdef _OPENMP
1796                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1797    #else
1798                        res=work[j]->resolveNodeSample(0,sample,roffset);
1799    #endif
1800                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1801                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1802                    }
1803                }
1804            }
1805            // Now we need to load the new results as identity ops into the lazy nodes
1806            for (int i=work.size()-1;i>=0;--i)
1807            {
1808                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1809            }
1810    }    }
1811    switch (getOpgroup(m_op))    else  // functionspaces do not match
1812    {    {
1813    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);          for (int i=0;i<work.size();++i)
1814    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);          {
1815    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);                  work[i]->resolveToIdentity();
1816    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);          }
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1817    }    }
1818  }  }
1819    
1820    
1821  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1822  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1823  DataReady_ptr  DataReady_ptr
1824  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1825  {  {
1826      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
   if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally  
1827    {    {
1828      collapse();      collapse();
1829    }    }
1830    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1831    {    {
1832      return m_id;      return m_id;
1833    }    }
1834      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1835    size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1836      // storage to evaluate its expression    RealVectorType& resvec=result->getVectorRW();
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
   int threadnum=0;  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVector();  
1837    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1838    
1839    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1840    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1841    const ValueType* res=0;   // Vector storing the answer    const RealVectorType* res=0;       // Storage for answer
1842    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1843    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel private(sample,res)
1844    for (sample=0;sample<totalsamples;++sample)    {
1845    {          size_t roffset=0;
1846  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1847            stackstart[omp_get_thread_num()]=&roffset;
1848            stackend[omp_get_thread_num()]=&roffset;
1849    #endif
1850            #pragma omp for schedule(static)
1851            for (sample=0;sample<totalsamples;++sample)
1852            {
1853                    roffset=0;
1854  #ifdef _OPENMP  #ifdef _OPENMP
1855      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1856  #else  #else
1857      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1858  #endif  #endif
1859  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1860      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1861  cerr << "offset=" << outoffset << endl;                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1862      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1863      {          }
1864      resvec[outoffset]=(*res)[resoffset];    }
1865      }  #ifdef LAZY_STACK_PROF
1866  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1867      {
1868            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1869    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1870            if (r>maxstackuse)
1871            {
1872                    maxstackuse=r;
1873            }
1874    }    }
1875      cout << "Max resolve Stack use=" << maxstackuse << endl;
1876    #endif
1877    return resptr;    return resptr;
1878  }  }
1879    
# Line 1058  std::string Line 1881  std::string
1881  DataLazy::toString() const  DataLazy::toString() const
1882  {  {
1883    ostringstream oss;    ostringstream oss;
1884    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1885    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1886      {
1887      case 1:       // tree format
1888            oss << endl;
1889            intoTreeString(oss,"");
1890            break;
1891      case 2:       // just the depth
1892            break;
1893      default:
1894            intoString(oss);
1895            break;
1896      }
1897    return oss.str();    return oss.str();
1898  }  }
1899    
# Line 1067  DataLazy::toString() const Line 1901  DataLazy::toString() const
1901  void  void
1902  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1903  {  {
1904    //    oss << "[" << m_children <<";"<<m_height <<"]";
1905    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1906    {    {
1907    case G_IDENTITY:    case G_IDENTITY:
1908      if (m_id->isExpanded())          if (m_id->isExpanded())
1909      {          {
1910         oss << "E";             oss << "E";
1911      }          }
1912      else if (m_id->isTagged())          else if (m_id->isTagged())
1913      {          {
1914        oss << "T";            oss << "T";
1915      }          }
1916      else if (m_id->isConstant())          else if (m_id->isConstant())
1917      {          {
1918        oss << "C";            oss << "C";
1919      }          }
1920      else          else
1921      {          {
1922        oss << "?";            oss << "?";
1923      }          }
1924      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1925      break;          break;
1926    case G_BINARY:    case G_BINARY:
1927      oss << '(';          oss << '(';
1928      m_left->intoString(oss);          m_left->intoString(oss);
1929      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1930      m_right->intoString(oss);          m_right->intoString(oss);
1931      oss << ')';          oss << ')';
1932      break;          break;
1933    case G_UNARY:    case G_UNARY:
1934      case G_UNARY_P:
1935    case G_NP1OUT:    case G_NP1OUT:
1936      oss << opToString(m_op) << '(';    case G_NP1OUT_P:
1937      m_left->intoString(oss);    case G_REDUCTION:
1938      oss << ')';          oss << opToString(m_op) << '(';
1939      break;          m_left->intoString(oss);
1940            oss << ')';
1941            break;
1942    case G_TENSORPROD:    case G_TENSORPROD:
1943      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1944      m_left->intoString(oss);          m_left->intoString(oss);
1945      oss << ", ";          oss << ", ";
1946      m_right->intoString(oss);          m_right->intoString(oss);
1947      oss << ')';          oss << ')';
1948      break;          break;
1949      case G_NP1OUT_2P:
1950            oss << opToString(m_op) << '(';
1951            m_left->intoString(oss);
1952            oss << ", " << m_axis_offset << ", " << m_transpose;
1953            oss << ')';
1954            break;
1955      case G_CONDEVAL:
1956            oss << opToString(m_op)<< '(' ;
1957            m_mask->intoString(oss);
1958            oss << " ? ";
1959            m_left->intoString(oss);
1960            oss << " : ";
1961            m_right->intoString(oss);
1962            oss << ')';
1963            break;
1964      default:
1965            oss << "UNKNOWN";
1966      }
1967    }
1968    
1969    
1970    void
1971    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1972    {
1973      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1974      switch (getOpgroup(m_op))
1975      {
1976      case G_IDENTITY:
1977            if (m_id->isExpanded())
1978            {
1979               oss << "E";
1980            }
1981            else if (m_id->isTagged())
1982            {
1983              oss << "T";
1984            }
1985            else if (m_id->isConstant())
1986            {
1987              oss << "C";
1988            }
1989            else
1990            {
1991              oss << "?";
1992            }
1993            oss << '@' << m_id.get() << endl;
1994            break;
1995      case G_BINARY:
1996            oss << opToString(m_op) << endl;
1997            indent+='.';
1998            m_left->intoTreeString(oss, indent);
1999            m_right->intoTreeString(oss, indent);
2000            break;
2001      case G_UNARY:
2002      case G_UNARY_P:
2003      case G_NP1OUT:
2004      case G_NP1OUT_P:
2005      case G_REDUCTION:
2006            oss << opToString(m_op) << endl;
2007            indent+='.';
2008            m_left->intoTreeString(oss, indent);
2009            break;
2010      case G_TENSORPROD:
2011            oss << opToString(m_op) << endl;
2012            indent+='.';
2013            m_left->intoTreeString(oss, indent);
2014            m_right->intoTreeString(oss, indent);
2015            break;
2016      case G_NP1OUT_2P:
2017            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2018            indent+='.';
2019            m_left->intoTreeString(oss, indent);
2020            break;
2021    default:    default:
2022      oss << "UNKNOWN";          oss << "UNKNOWN";
2023    }    }
2024  }  }
2025    
2026    
2027  DataAbstract*  DataAbstract*
2028  DataLazy::deepCopy()  DataLazy::deepCopy() const
2029  {  {
2030    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2031    {    {
2032    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2033    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
2034    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2035      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2036      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2037    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2038    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2039      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2040      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2041    default:    default:
2042      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2043    }    }
2044  }  }
2045    
2046    
2047    
2048  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2049  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2050  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
2051  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2052  // form part of the expression.  // form part of the expression.
2053  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2054  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2055  DataLazy::getLength() const  DataLazy::getLength() const
2056  {  {
2057    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1150  DataLazy::getSlice(const DataTypes::Regi Line 2066  DataLazy::getSlice(const DataTypes::Regi
2066    
2067    
2068  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2069  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2070  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2071                   int dataPointNo)                   int dataPointNo)
2072  {  {
2073    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2074    {    {
2075      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2076    }    }
2077    if (m_readytype!='E')    if (m_readytype!='E')
2078    {    {
2079      collapse();          collapse();
2080      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2081    }    }
2082    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
2083    // so we only need to know which child to ask    // so we only need to know which child to ask
2084    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
2085    {    {
2086      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2087    }    }
2088    else    else
2089    {    {
2090      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2091    }    }
2092  }  }
2093    
2094  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2095  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2096  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2097                   int dataPointNo) const                   int dataPointNo) const
2098  {  {
2099    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2100    {    {
2101      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2102    }    }
2103    if (m_readytype=='E')    if (m_readytype=='E')
2104    {    {
# Line 1190  DataLazy::getPointOffset(int sampleNo, Line 2106  DataLazy::getPointOffset(int sampleNo,
2106      // so we only need to know which child to ask      // so we only need to know which child to ask
2107      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
2108      {      {
2109      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2110      }      }
2111      else      else
2112      {      {
2113      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2114      }      }
2115    }    }
2116    if (m_readytype=='C')    if (m_readytype=='C')
2117    {    {
2118      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2119    }    }
2120    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2121  }  }
2122    
2123  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2124  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2125  void  void
2126  DataLazy::setToZero()  DataLazy::setToZero()
2127  {  {
2128    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2129    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2130    m_op=IDENTITY;  //   m_op=IDENTITY;
2131    m_right.reset();    //   m_right.reset();  
2132    m_left.reset();  //   m_left.reset();
2133    m_readytype='C';  //   m_readytype='C';
2134    m_buffsRequired=1;  //   m_buffsRequired=1;
2135    
2136      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2137      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2138    }
2139    
2140    bool
2141    DataLazy::actsExpanded() const
2142    {
2143            return (m_readytype=='E');
2144  }  }
2145    
2146  }   // end namespace  } // end namespace
2147    

Legend:
Removed from v.2066  
changed lines
  Added in v.6066

  ViewVC Help
Powered by ViewVC 1.1.26