/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2737 by jfenwick, Tue Nov 3 00:44:00 2009 UTC trunk/escriptcore/src/DataLazy.cpp revision 6072 by jfenwick, Thu Mar 17 00:34:01 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2009 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18    #include "Data.h"
19    #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "UnaryFuncs.h"    // for escript::fsign
23    #include "Utils.h"
24    #include "DataMaths.h"
25    
26  #ifdef USE_NETCDF  #ifdef USE_NETCDF
27  #include <netcdfcpp.h>  #include <netcdfcpp.h>
28  #endif  #endif
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
 #include "Data.h"  
 #include "UnaryFuncs.h"     // for escript::fsign  
 #include "Utils.h"  
29    
30  #include "EscriptParams.h"  #include <iomanip> // for some fancy formatting in debug
31    
32    using namespace escript::DataTypes;
33    
34  #include <iomanip>      // for some fancy formatting in debug  #define NO_ARG
35    
36  // #define LAZYDEBUG(X) if (privdebug){X;}  // #define LAZYDEBUG(X) if (privdebug){X;}
37  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
# Line 44  bool privdebug=false; Line 45  bool privdebug=false;
45    
46  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47    
48  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49    
50    
51    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53  /*  /*
54  How does DataLazy work?  How does DataLazy work?
55  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 58  A special operation, IDENTITY, stores an Line 61  A special operation, IDENTITY, stores an
61  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
62    
63  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
64  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
65    
66  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
67  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 66  I will refer to individual DataLazy obje Line 69  I will refer to individual DataLazy obje
69    
70  Each node also stores:  Each node also stores:
71  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
72      evaluated.          evaluated.
73  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
74      evaluate the expression.          evaluate the expression.
75  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
76    
77  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 109  namespace escript Line 112  namespace escript
112  namespace  namespace
113  {  {
114    
115    
116    // enabling this will print out when ever the maximum stacksize used by resolve increases
117    // it assumes _OPENMP is also in use
118    //#define LAZY_STACK_PROF
119    
120    
121    
122    #ifndef _OPENMP
123      #ifdef LAZY_STACK_PROF
124      #undef LAZY_STACK_PROF
125      #endif
126    #endif
127    
128    
129    #ifdef LAZY_STACK_PROF
130    std::vector<void*> stackstart(getNumberOfThreads());
131    std::vector<void*> stackend(getNumberOfThreads());
132    size_t maxstackuse=0;
133    #endif
134    
135  enum ES_opgroup  enum ES_opgroup
136  {  {
137     G_UNKNOWN,     G_UNKNOWN,
138     G_IDENTITY,     G_IDENTITY,
139     G_BINARY,        // pointwise operations with two arguments     G_BINARY,            // pointwise operations with two arguments
140     G_UNARY,     // pointwise operations with one argument     G_UNARY,             // pointwise operations with one argument
141     G_UNARY_P,       // pointwise operations with one argument, requiring a parameter     G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
142     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,            // non-pointwise op with one output
143     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
144     G_TENSORPROD,    // general tensor product     G_TENSORPROD,        // general tensor product
145     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params     G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
146     G_REDUCTION      // non-pointwise unary op with a scalar output     G_REDUCTION,         // non-pointwise unary op with a scalar output
147       G_CONDEVAL
148  };  };
149    
150    
151    
152    
153  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
154              "sin","cos","tan",                          "sin","cos","tan",
155              "asin","acos","atan","sinh","cosh","tanh","erf",                          "asin","acos","atan","sinh","cosh","tanh","erf",
156              "asinh","acosh","atanh",                          "asinh","acosh","atanh",
157              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "log10","log","sign","abs","neg","pos","exp","sqrt",
158              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
159              "symmetric","nonsymmetric",                          "symmetric","nonsymmetric",
160              "prod",                          "prod",
161              "transpose", "trace",                          "transpose", "trace",
162              "swapaxes",                          "swapaxes",
163              "minval", "maxval"};                          "minval", "maxval",
164  int ES_opcount=43;                          "condEval"};
165    int ES_opcount=44;
166  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167              G_UNARY,G_UNARY,G_UNARY, //10                          G_UNARY,G_UNARY,G_UNARY, //10
168              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
169              G_UNARY,G_UNARY,G_UNARY,                    // 20                          G_UNARY,G_UNARY,G_UNARY,                                        // 20
170              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
172              G_NP1OUT,G_NP1OUT,                          G_NP1OUT,G_NP1OUT,
173              G_TENSORPROD,                          G_TENSORPROD,
174              G_NP1OUT_P, G_NP1OUT_P,                          G_NP1OUT_P, G_NP1OUT_P,
175              G_NP1OUT_2P,                          G_NP1OUT_2P,
176              G_REDUCTION, G_REDUCTION};                          G_REDUCTION, G_REDUCTION,
177                            G_CONDEVAL};
178  inline  inline
179  ES_opgroup  ES_opgroup
180  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 160  getOpgroup(ES_optype op) Line 186  getOpgroup(ES_optype op)
186  FunctionSpace  FunctionSpace
187  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
188  {  {
189      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
190      // maybe we need an interpolate node -          // maybe we need an interpolate node -
191      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
192      // programming error exception.          // programming error exception.
193    
194    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
195    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
196    if (l!=r)    if (l!=r)
197    {    {
198      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
199        if (res==1)
200      {      {
201      return l;          return l;
202      }      }
203      if (l.probeInterpolation(r))      if (res==-1)
204      {      {
205      return r;          return r;
206      }      }
207      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
208    }    }
# Line 187  resultFS(DataAbstract_ptr left, DataAbst Line 214  resultFS(DataAbstract_ptr left, DataAbst
214  DataTypes::ShapeType  DataTypes::ShapeType
215  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
216  {  {
217      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
218      {          {
219        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
220        {            {
221          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
222        }            }
223    
224        if (left->getRank()==0)   // we need to allow scalar * anything            if (left->getRank()==0)       // we need to allow scalar * anything
225        {            {
226          return right->getShape();                  return right->getShape();
227        }            }
228        if (right->getRank()==0)            if (right->getRank()==0)
229        {            {
230          return left->getShape();                  return left->getShape();
231        }            }
232        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
233      }          }
234      return left->getShape();          return left->getShape();
235  }  }
236    
237  // return the shape for "op left"  // return the shape for "op left"
# Line 212  resultShape(DataAbstract_ptr left, DataA Line 239  resultShape(DataAbstract_ptr left, DataA
239  DataTypes::ShapeType  DataTypes::ShapeType
240  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
241  {  {
242      switch(op)          switch(op)
243      {          {
244          case TRANS:          case TRANS:
245         {            // for the scoping of variables             {                    // for the scoping of variables
246          const DataTypes::ShapeType& s=left->getShape();                  const DataTypes::ShapeType& s=left->getShape();
247          DataTypes::ShapeType sh;                  DataTypes::ShapeType sh;
248          int rank=left->getRank();                  int rank=left->getRank();
249          if (axis_offset<0 || axis_offset>rank)                  if (axis_offset<0 || axis_offset>rank)
250          {                  {
251                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
252              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
253              for (int i=0; i<rank; i++)              throw DataException(e.str());
254          {          }
255             int index = (axis_offset+i)%rank;          for (int i=0; i<rank; i++)
256                 sh.push_back(s[index]); // Append to new shape                  {
257              }                     int index = (axis_offset+i)%rank;
258          return sh;             sh.push_back(s[index]); // Append to new shape
259         }          }
260      break;                  return sh;
261      case TRACE:             }
262         {          break;
263          int rank=left->getRank();          case TRACE:
264          if (rank<2)             {
265          {                  int rank=left->getRank();
266             throw DataException("Trace can only be computed for objects with rank 2 or greater.");                  if (rank<2)
267          }                  {
268          if ((axis_offset>rank-2) || (axis_offset<0))                     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
269          {                  }
270             throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");                  if ((axis_offset>rank-2) || (axis_offset<0))
271          }                  {
272          if (rank==2)                     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
273          {                  }
274             return DataTypes::scalarShape;                  if (rank==2)
275          }                  {
276          else if (rank==3)                     return DataTypes::scalarShape;
277          {                  }
278             DataTypes::ShapeType sh;                  else if (rank==3)
279                 if (axis_offset==0)                  {
280             {                     DataTypes::ShapeType sh;
281                  sh.push_back(left->getShape()[2]);                     if (axis_offset==0)
282                 }                     {
283                 else     // offset==1                          sh.push_back(left->getShape()[2]);
284             {                     }
285              sh.push_back(left->getShape()[0]);                     else         // offset==1
286                 }                     {
287             return sh;                          sh.push_back(left->getShape()[0]);
288          }                     }
289          else if (rank==4)                     return sh;
290          {                  }
291             DataTypes::ShapeType sh;                  else if (rank==4)
292             const DataTypes::ShapeType& s=left->getShape();                  {
293                 if (axis_offset==0)                     DataTypes::ShapeType sh;
294             {                     const DataTypes::ShapeType& s=left->getShape();
295                  sh.push_back(s[2]);                     if (axis_offset==0)
296                  sh.push_back(s[3]);                     {
297                 }                          sh.push_back(s[2]);
298                 else if (axis_offset==1)                          sh.push_back(s[3]);
299             {                     }
300                  sh.push_back(s[0]);                     else if (axis_offset==1)
301                  sh.push_back(s[3]);                     {
302                 }                          sh.push_back(s[0]);
303             else     // offset==2                          sh.push_back(s[3]);
304             {                     }
305              sh.push_back(s[0]);                     else         // offset==2
306              sh.push_back(s[1]);                     {
307             }                          sh.push_back(s[0]);
308             return sh;                          sh.push_back(s[1]);
309          }                     }
310          else        // unknown rank                     return sh;
311          {                  }
312             throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");                  else            // unknown rank
313          }                  {
314         }                     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
315      break;                  }
316          default:             }
317      throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");          break;
318      }          default:
319            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
320            }
321  }  }
322    
323  DataTypes::ShapeType  DataTypes::ShapeType
# Line 306  SwapShape(DataAbstract_ptr left, const i Line 335  SwapShape(DataAbstract_ptr left, const i
335          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
336       }       }
337       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
338          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
339            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
340            throw DataException(e.str());
341       }       }
342       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
343           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          stringstream e;
344            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
345            throw DataException(e.str());
346       }       }
347       if (axis0 == axis1) {       if (axis0 == axis1) {
348           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 340  SwapShape(DataAbstract_ptr left, const i Line 373  SwapShape(DataAbstract_ptr left, const i
373  DataTypes::ShapeType  DataTypes::ShapeType
374  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
375  {  {
376                
377    // Get rank and shape of inputs    // Get rank and shape of inputs
378    int rank0 = left->getRank();    int rank0 = left->getRank();
379    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 349  GTPShape(DataAbstract_ptr left, DataAbst Line 382  GTPShape(DataAbstract_ptr left, DataAbst
382    
383    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
384    int start0=0, start1=0;    int start0=0, start1=0;
385    if (transpose == 0)       {}    if (transpose == 0)           {}
386    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
387    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
388    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
389    
390    if (rank0<axis_offset)    if (rank0<axis_offset)
391    {    {
392      throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");          throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
393    }    }
394    
395    // Adjust the shapes for transpose    // Adjust the shapes for transpose
396    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
397    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
398    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
399    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
400    
401    // Prepare for the loops of the product    // Prepare for the loops of the product
402    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
403    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
404      SL *= tmpShape0[i];      SL *= tmpShape0[i];
405    }    }
406    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
407      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
408        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
409      }      }
410      SM *= tmpShape0[i];      SM *= tmpShape0[i];
411    }    }
412    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
413      SR *= tmpShape1[i];      SR *= tmpShape1[i];
414    }    }
415    
416    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
417    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
418    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
419       int out_index=0;       int out_index=0;
420       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
421       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
# Line 398  GTPShape(DataAbstract_ptr left, DataAbst Line 431  GTPShape(DataAbstract_ptr left, DataAbst
431    return shape2;    return shape2;
432  }  }
433    
434  // determine the number of samples requires to evaluate an expression combining left and right  }       // end anonymous namespace
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_REDUCTION:  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
 }   // end anonymous namespace  
435    
436    
437    
# Line 439  opToString(ES_optype op) Line 446  opToString(ES_optype op)
446    return ES_opstrings[op];    return ES_opstrings[op];
447  }  }
448    
 #ifdef LAZY_NODE_STORAGE  
449  void DataLazy::LazyNodeSetup()  void DataLazy::LazyNodeSetup()
450  {  {
451  #ifdef _OPENMP  #ifdef _OPENMP
# Line 456  void DataLazy::LazyNodeSetup() Line 462  void DataLazy::LazyNodeSetup()
462      m_sampleids[0]=-1;      m_sampleids[0]=-1;
463  #endif  // _OPENMP  #endif  // _OPENMP
464  }  }
 #endif   // LAZY_NODE_STORAGE  
465    
466    
467  // Creates an identity node  // Creates an identity node
468  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
469      : parent(p->getFunctionSpace(),p->getShape())          : parent(p->getFunctionSpace(),p->getShape())
470  #ifdef LAZY_NODE_STORAGE          ,m_sampleids(0),
471      ,m_sampleids(0),          m_samples(1)
     m_samples(1)  
 #endif  
472  {  {
473     if (p->isLazy())     if (p->isLazy())
474     {     {
475      // I don't want identity of Lazy.          // I don't want identity of Lazy.
476      // Question: Why would that be so bad?          // Question: Why would that be so bad?
477      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
478      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
479     }     }
480     else     else
481     {     {
482      p->makeLazyShared();          p->makeLazyShared();
483      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
484      makeIdentity(dr);          makeIdentity(dr);
485  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
486     }     }
487  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
488  }  }
489    
490  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
491      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
492      m_op(op),          m_op(op),
493      m_axis_offset(0),          m_axis_offset(0),
494      m_transpose(0),          m_transpose(0),
495      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
496  {  {
497     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
498     {     {
499      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
500     }     }
501    
502     DataLazy_ptr lleft;     DataLazy_ptr lleft;
503     if (!left->isLazy())     if (!left->isLazy())
504     {     {
505      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
506     }     }
507     else     else
508     {     {
509      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
510     }     }
511     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
512     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
513     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
514     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
515     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
516     LazyNodeSetup();     LazyNodeSetup();
 #endif  
517     SIZELIMIT     SIZELIMIT
518  }  }
519    
520    
521  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
522  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
523      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
524      m_op(op),          m_op(op),
525      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
526  {  {
527  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
528     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
529     {     {
530      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
531     }     }
532    
533     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
534     {     {
535      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
536      Data ltemp(left);          Data ltemp(left);
537      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
538      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
539     }     }
540     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
541     {     {
542      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
543      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
544  LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)  LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
545     }     }
546     left->operandCheck(*right);     left->operandCheck(*right);
547    
548     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
549     {     {
550      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
551  LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)  LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
552     }     }
553     else     else
554     {     {
555      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
556  LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)  LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
557     }     }
558     if (right->isLazy())     if (right->isLazy())
559     {     {
560      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
561  LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)  LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
562     }     }
563     else     else
564     {     {
565      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
566  LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)  LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
567     }     }
568     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
569     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
570     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
571     {     {
572      m_readytype='E';          m_readytype='E';
573     }     }
574     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
575     {     {
576      m_readytype='T';          m_readytype='T';
577     }     }
578     else     else
579     {     {
580      m_readytype='C';          m_readytype='C';
581     }     }
582     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
583     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
584     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
585     LazyNodeSetup();     LazyNodeSetup();
 #endif  
586     SIZELIMIT     SIZELIMIT
587  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588  }  }
589    
590  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
591      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
592      m_op(op),          m_op(op),
593      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
594      m_transpose(transpose)          m_transpose(transpose)
595  {  {
596     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
597     {     {
598      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
599     }     }
600     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
601     {     {
602      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
603     }     }
604     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
605     {     {
606      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
607      Data ltemp(left);          Data ltemp(left);
608      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
609      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
610     }     }
611     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
612     {     {
613      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
614      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
615     }     }
616  //    left->operandCheck(*right);  //    left->operandCheck(*right);
617    
618     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
619     {     {
620      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
621     }     }
622     else     else
623     {     {
624      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
625     }     }
626     if (right->isLazy())     if (right->isLazy())
627     {     {
628      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
629     }     }
630     else     else
631     {     {
632      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
633     }     }
634     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
635     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
636     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
637     {     {
638      m_readytype='E';          m_readytype='E';
639     }     }
640     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
641     {     {
642      m_readytype='T';          m_readytype='T';
643     }     }
644     else     else
645     {     {
646      m_readytype='C';          m_readytype='C';
647     }     }
648     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
649     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
650     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
651     LazyNodeSetup();     LazyNodeSetup();
 #endif  
652     SIZELIMIT     SIZELIMIT
653  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
654  }  }
655    
656    
657  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
658      : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),          : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
659      m_op(op),          m_op(op),
660      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
661      m_transpose(0),          m_transpose(0),
662      m_tol(0)          m_tol(0)
663  {  {
664     if ((getOpgroup(op)!=G_NP1OUT_P))     if ((getOpgroup(op)!=G_NP1OUT_P))
665     {     {
666      throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
667     }     }
668     DataLazy_ptr lleft;     DataLazy_ptr lleft;
669     if (!left->isLazy())     if (!left->isLazy())
670     {     {
671      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
672     }     }
673     else     else
674     {     {
675      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
676     }     }
677     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
678     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
679     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
680     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
681     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
682     LazyNodeSetup();     LazyNodeSetup();
 #endif  
683     SIZELIMIT     SIZELIMIT
684  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
685  }  }
686    
687  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
688      : parent(left->getFunctionSpace(), left->getShape()),          : parent(left->getFunctionSpace(), left->getShape()),
689      m_op(op),          m_op(op),
690      m_axis_offset(0),          m_axis_offset(0),
691      m_transpose(0),          m_transpose(0),
692      m_tol(tol)          m_tol(tol)
693  {  {
694     if ((getOpgroup(op)!=G_UNARY_P))     if ((getOpgroup(op)!=G_UNARY_P))
695     {     {
696      throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
697     }     }
698     DataLazy_ptr lleft;     DataLazy_ptr lleft;
699     if (!left->isLazy())     if (!left->isLazy())
700     {     {
701      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
702     }     }
703     else     else
704     {     {
705      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
706     }     }
707     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
708     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
709     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
710     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
711     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
712     LazyNodeSetup();     LazyNodeSetup();
 #endif  
713     SIZELIMIT     SIZELIMIT
714  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
715  }  }
716    
717    
718  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
719      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),          : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
720      m_op(op),          m_op(op),
721      m_axis_offset(axis0),          m_axis_offset(axis0),
722      m_transpose(axis1),          m_transpose(axis1),
723      m_tol(0)          m_tol(0)
724  {  {
725     if ((getOpgroup(op)!=G_NP1OUT_2P))     if ((getOpgroup(op)!=G_NP1OUT_2P))
726     {     {
727      throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
728     }     }
729     DataLazy_ptr lleft;     DataLazy_ptr lleft;
730     if (!left->isLazy())     if (!left->isLazy())
731     {     {
732      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
733     }     }
734     else     else
735     {     {
736      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
737     }     }
738     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
739     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
740     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
741     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
742     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
743     LazyNodeSetup();     LazyNodeSetup();
 #endif  
744     SIZELIMIT     SIZELIMIT
745  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
746  }  }
747    
748  DataLazy::~DataLazy()  
749    namespace
750  {  {
 #ifdef LAZY_NODE_SETUP  
    delete[] m_sampleids;  
    delete[] m_samples;  
 #endif  
 }  
751    
752        inline int max3(int a, int b, int c)
753        {
754            int t=(a>b?a:b);
755            return (t>c?t:c);
756    
757  int      }
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
758  }  }
759    
760    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
761  size_t          : parent(left->getFunctionSpace(), left->getShape()),
762  DataLazy::getMaxSampleSize() const          m_op(CONDEVAL),
763            m_axis_offset(0),
764            m_transpose(0),
765            m_tol(0)
766  {  {
767      return m_maxsamplesize;  
768       DataLazy_ptr lmask;
769       DataLazy_ptr lleft;
770       DataLazy_ptr lright;
771       if (!mask->isLazy())
772       {
773            lmask=DataLazy_ptr(new DataLazy(mask));
774       }
775       else
776       {
777            lmask=dynamic_pointer_cast<DataLazy>(mask);
778       }
779       if (!left->isLazy())
780       {
781            lleft=DataLazy_ptr(new DataLazy(left));
782       }
783       else
784       {
785            lleft=dynamic_pointer_cast<DataLazy>(left);
786       }
787       if (!right->isLazy())
788       {
789            lright=DataLazy_ptr(new DataLazy(right));
790       }
791       else
792       {
793            lright=dynamic_pointer_cast<DataLazy>(right);
794       }
795       m_readytype=lmask->m_readytype;
796       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
797       {
798            throw DataException("Programmer Error - condEval arguments must have the same readytype");
799       }
800       m_left=lleft;
801       m_right=lright;
802       m_mask=lmask;
803       m_samplesize=getNumDPPSample()*getNoValues();
804       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
805       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
806       LazyNodeSetup();
807       SIZELIMIT
808    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
809  }  }
810    
811    
812    
813  size_t  DataLazy::~DataLazy()
 DataLazy::getSampleBufferSize() const  
814  {  {
815      return m_maxsamplesize*(max(1,m_buffsRequired));     delete[] m_sampleids;
816  }  }
817    
818    
819  /*  /*
820    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
821    This does the work for the collapse method.    This does the work for the collapse method.
822    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
823  */  */
824  DataReady_ptr  DataReady_ptr
825  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
826  {  {
827    if (m_readytype=='E')    if (m_readytype=='E')
828    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
829      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
830    }    }
831    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 823  DataLazy::collapseToReady() Line 843  DataLazy::collapseToReady()
843    switch(m_op)    switch(m_op)
844    {    {
845      case ADD:      case ADD:
846      result=left+right;          result=left+right;
847      break;          break;
848      case SUB:            case SUB:          
849      result=left-right;          result=left-right;
850      break;          break;
851      case MUL:            case MUL:          
852      result=left*right;          result=left*right;
853      break;          break;
854      case DIV:            case DIV:          
855      result=left/right;          result=left/right;
856      break;          break;
857      case SIN:      case SIN:
858      result=left.sin();            result=left.sin();      
859      break;          break;
860      case COS:      case COS:
861      result=left.cos();          result=left.cos();
862      break;          break;
863      case TAN:      case TAN:
864      result=left.tan();          result=left.tan();
865      break;          break;
866      case ASIN:      case ASIN:
867      result=left.asin();          result=left.asin();
868      break;          break;
869      case ACOS:      case ACOS:
870      result=left.acos();          result=left.acos();
871      break;          break;
872      case ATAN:      case ATAN:
873      result=left.atan();          result=left.atan();
874      break;          break;
875      case SINH:      case SINH:
876      result=left.sinh();          result=left.sinh();
877      break;          break;
878      case COSH:      case COSH:
879      result=left.cosh();          result=left.cosh();
880      break;          break;
881      case TANH:      case TANH:
882      result=left.tanh();          result=left.tanh();
883      break;          break;
884      case ERF:      case ERF:
885      result=left.erf();          result=left.erf();
886      break;          break;
887     case ASINH:     case ASINH:
888      result=left.asinh();          result=left.asinh();
889      break;          break;
890     case ACOSH:     case ACOSH:
891      result=left.acosh();          result=left.acosh();
892      break;          break;
893     case ATANH:     case ATANH:
894      result=left.atanh();          result=left.atanh();
895      break;          break;
896      case LOG10:      case LOG10:
897      result=left.log10();          result=left.log10();
898      break;          break;
899      case LOG:      case LOG:
900      result=left.log();          result=left.log();
901      break;          break;
902      case SIGN:      case SIGN:
903      result=left.sign();          result=left.sign();
904      break;          break;
905      case ABS:      case ABS:
906      result=left.abs();          result=left.abs();
907      break;          break;
908      case NEG:      case NEG:
909      result=left.neg();          result=left.neg();
910      break;          break;
911      case POS:      case POS:
912      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
913      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
914      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
915      break;          break;
916      case EXP:      case EXP:
917      result=left.exp();          result=left.exp();
918      break;          break;
919      case SQRT:      case SQRT:
920      result=left.sqrt();          result=left.sqrt();
921      break;          break;
922      case RECIP:      case RECIP:
923      result=left.oneOver();          result=left.oneOver();
924      break;          break;
925      case GZ:      case GZ:
926      result=left.wherePositive();          result=left.wherePositive();
927      break;          break;
928      case LZ:      case LZ:
929      result=left.whereNegative();          result=left.whereNegative();
930      break;          break;
931      case GEZ:      case GEZ:
932      result=left.whereNonNegative();          result=left.whereNonNegative();
933      break;          break;
934      case LEZ:      case LEZ:
935      result=left.whereNonPositive();          result=left.whereNonPositive();
936      break;          break;
937      case NEZ:      case NEZ:
938      result=left.whereNonZero(m_tol);          result=left.whereNonZero(m_tol);
939      break;          break;
940      case EZ:      case EZ:
941      result=left.whereZero(m_tol);          result=left.whereZero(m_tol);
942      break;          break;
943      case SYM:      case SYM:
944      result=left.symmetric();          result=left.symmetric();
945      break;          break;
946      case NSYM:      case NSYM:
947      result=left.nonsymmetric();          result=left.nonsymmetric();
948      break;          break;
949      case PROD:      case PROD:
950      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
951      break;          break;
952      case TRANS:      case TRANS:
953      result=left.transpose(m_axis_offset);          result=left.transpose(m_axis_offset);
954      break;          break;
955      case TRACE:      case TRACE:
956      result=left.trace(m_axis_offset);          result=left.trace(m_axis_offset);
957      break;          break;
958      case SWAP:      case SWAP:
959      result=left.swapaxes(m_axis_offset, m_transpose);          result=left.swapaxes(m_axis_offset, m_transpose);
960      break;          break;
961      case MINVAL:      case MINVAL:
962      result=left.minval();          result=left.minval();
963      break;          break;
964      case MAXVAL:      case MAXVAL:
965      result=left.minval();          result=left.minval();
966      break;          break;
967      default:      default:
968      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
969    }    }
970    return result.borrowReadyPtr();    return result.borrowReadyPtr();
971  }  }
# Line 957  DataLazy::collapseToReady() Line 977  DataLazy::collapseToReady()
977     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
978  */  */
979  void  void
980  DataLazy::collapse()  DataLazy::collapse() const
981  {  {
982    if (m_op==IDENTITY)    if (m_op==IDENTITY)
983    {    {
984      return;          return;
985    }    }
986    if (m_readytype=='E')    if (m_readytype=='E')
987    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
988      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
989    }    }
990    m_id=collapseToReady();    m_id=collapseToReady();
991    m_op=IDENTITY;    m_op=IDENTITY;
992  }  }
993    
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
   }  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);  
   const double* left=&((*vleft)[roffset]);  
   double* result=&(v[offset]);  
   roffset=offset;  
   switch (m_op)  
   {  
     case SIN:    
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);  
     break;  
     case COS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);  
     break;  
     case TAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);  
     break;  
     case ASIN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);  
     break;  
     case ACOS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);  
     break;  
     case ATAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);  
     break;  
     case SINH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);  
     break;  
     case COSH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);  
     break;  
     case TANH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);  
     break;  
     case ERF:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
    case ASINH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
    case ACOSH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
    case ATANH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
     case LOG10:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);  
     break;  
     case LOG:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);  
     break;  
     case SIGN:  
     tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
     break;  
     case ABS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);  
     break;  
     case NEG:  
     tensor_unary_operation(m_samplesize, left, result, negate<double>());  
     break;  
     case POS:  
     // it doesn't mean anything for delayed.  
     // it will just trigger a deep copy of the lazy object  
     throw DataException("Programmer error - POS not supported for lazy data.");  
     break;  
     case EXP:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);  
     break;  
     case SQRT:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);  
     break;  
     case RECIP:  
     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
     break;  
     case GZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
     break;  
     case LZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
     break;  
     case GEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
     break;  
     case LEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
     break;  
 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  
     case NEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));  
     break;  
     case EZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));  
     break;  
   
     default:  
     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
 /*  
   \brief Compute the value of the expression (reduction operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveReduction(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
   }  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);  
   double* result=&(v[offset]);  
   roffset=offset;  
   unsigned int ndpps=getNumDPPSample();  
   unsigned int psize=DataTypes::noValues(getShape());  
   switch (m_op)  
   {  
     case MINVAL:  
     {  
       for (unsigned int z=0;z<ndpps;++z)  
       {  
          FMin op;  
          *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max());  
          roffset+=psize;  
          result++;  
       }  
     }  
     break;  
     case MAXVAL:  
     {  
       for (unsigned int z=0;z<ndpps;++z)  
       {  
          FMax op;  
          *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max()*-1);  
          roffset+=psize;  
          result++;  
       }  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveReduction can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
   
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset=roffset+m_samplesize;  
 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t step=getNoValues();  
   switch (m_op)  
   {  
     case SYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     case NSYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case TRACE:  
     for (loop=0;loop<numsteps;++loop)  
     {  
 size_t zz=sampleNo*getNumDPPSample()+loop;  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "point=" <<  zz<< endl;)  
 LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)  
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     case TRANS:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
 /*  
   \brief Compute the value of the expression (unary operation with int params) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case SWAP:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
   
 #define PROC_OP(TYPE,X)                               \  
     for (int j=0;j<onumsteps;++j)\  
     {\  
       for (int i=0;i<numsteps;++i,resultp+=resultStep) \  
       { \  
 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\  
 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\  
          tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  
 LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \  
          lroffset+=leftstep; \  
          rroffset+=rightstep; \  
       }\  
       lroffset+=oleftstep;\  
       rroffset+=orightstep;\  
     }  
   
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // If both children are expanded, then we can process them in a single operation (we treat  
 // the whole sample as one big datapoint.  
 // If one of the children is not expanded, then we need to treat each point in the sample  
 // individually.  
 // There is an additional complication when scalar operations are considered.  
 // For example, 2+Vector.  
 // In this case each double within the point is treated individually  
 DataTypes::ValueType*  
 DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   if (!leftExp && !rightExp)  
   {  
     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");  
   }  
   bool leftScalar=(m_left->getRank()==0);  
   bool rightScalar=(m_right->getRank()==0);  
   if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))  
   {  
     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");  
   }  
   size_t leftsize=m_left->getNoValues();  
   size_t rightsize=m_right->getNoValues();  
   size_t chunksize=1;           // how many doubles will be processed in one go  
   int leftstep=0;       // how far should the left offset advance after each step  
   int rightstep=0;  
   int numsteps=0;       // total number of steps for the inner loop  
   int oleftstep=0;  // the o variables refer to the outer loop  
   int orightstep=0; // The outer loop is only required in cases where there is an extended scalar  
   int onumsteps=1;  
     
   bool LES=(leftExp && leftScalar); // Left is an expanded scalar  
   bool RES=(rightExp && rightScalar);  
   bool LS=(!leftExp && leftScalar); // left is a single scalar  
   bool RS=(!rightExp && rightScalar);  
   bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar  
   bool RN=(!rightExp && !rightScalar);  
   bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar  
   bool REN=(rightExp && !rightScalar);  
   
   if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars  
   {  
     chunksize=m_left->getNumDPPSample()*leftsize;  
     leftstep=0;  
     rightstep=0;  
     numsteps=1;  
   }  
   else if (LES || RES)  
   {  
     chunksize=1;  
     if (LES)        // left is an expanded scalar  
     {  
         if (RS)  
         {  
            leftstep=1;  
            rightstep=0;  
            numsteps=m_left->getNumDPPSample();  
         }  
         else        // RN or REN  
         {  
            leftstep=0;  
            oleftstep=1;  
            rightstep=1;  
            orightstep=(RN ? -(int)rightsize : 0);  
            numsteps=rightsize;  
            onumsteps=m_left->getNumDPPSample();  
         }  
     }  
     else        // right is an expanded scalar  
     {  
         if (LS)  
         {  
            rightstep=1;  
            leftstep=0;  
            numsteps=m_right->getNumDPPSample();  
         }  
         else  
         {  
            rightstep=0;  
            orightstep=1;  
            leftstep=1;  
            oleftstep=(LN ? -(int)leftsize : 0);  
            numsteps=leftsize;  
            onumsteps=m_right->getNumDPPSample();  
         }  
     }  
   }  
   else  // this leaves (LEN, RS), (LEN, RN) and their transposes  
   {  
     if (LEN)    // and Right will be a single value  
     {  
         chunksize=rightsize;  
         leftstep=rightsize;  
         rightstep=0;  
         numsteps=m_left->getNumDPPSample();  
         if (RS)  
         {  
            numsteps*=leftsize;  
         }  
     }  
     else    // REN  
     {  
         chunksize=leftsize;  
         rightstep=leftsize;  
         leftstep=0;  
         numsteps=m_right->getNumDPPSample();  
         if (LS)  
         {  
            numsteps*=rightsize;  
         }  
     }  
   }  
   
   int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0  
     // Get the values of sub-expressions  
   const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on  
     // calcBufss for why we can't put offset as the 2nd param above  
   const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  
 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  
 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  
 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)  
 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)  
 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  
 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  
   
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case ADD:  
         PROC_OP(NO_ARG,plus<double>());  
     break;  
     case SUB:  
     PROC_OP(NO_ARG,minus<double>());  
     break;  
     case MUL:  
     PROC_OP(NO_ARG,multiplies<double>());  
     break;  
     case DIV:  
     PROC_OP(NO_ARG,divides<double>());  
     break;  
     case POW:  
        PROC_OP(double (double,double),::pow);  
     break;  
     default:  
     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // unlike the other resolve helpers, we must treat these datapoints separately.  
 DataTypes::ValueType*  
 DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   int steps=getNumDPPSample();  
 /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
   int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method  
   int rightStep=(rightExp?m_right->getNoValues() : 0);  
   
   int resultStep=getNoValues();  
     // Get the values of sub-expressions (leave a gap of one sample for the result).  
   int gap=offset+m_samplesize;    
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
   
   const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);  
   gap+=m_left->getMaxSampleSize();  
   
   
 LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)  
   
   
   const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);  
   
 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  
 cout << getNoValues() << endl;)  
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
   
 for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  
 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  
 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  
 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)  
 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  
 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case PROD:  
     for (int i=0;i<steps;++i,resultp+=resultStep)  
     {  
   
 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)  
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
           const double *ptr_0 = &((*left)[lroffset]);  
           const double *ptr_1 = &((*right)[rroffset]);  
   
 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)  
 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)  
   
           matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
   
 LAZYDEBUG(cout << "Results--\n";  
 {  
   DataVector dv(getNoValues());  
 for (int z=0;z<getNoValues();++z)  
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
       lroffset+=leftStep;  
       rroffset+=rightStep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
 #ifdef LAZY_NODE_STORAGE  
   
994  // The result will be stored in m_samples  // The result will be stored in m_samples
995  // The return value is a pointer to the DataVector, offset is the offset within the return value  // The return value is a pointer to the DataVector, offset is the offset within the return value
996  const DataTypes::ValueType*  const DataTypes::RealVectorType*
997  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
998  {  {
999  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1000      // collapse so we have a 'E' node or an IDENTITY for some other type          // collapse so we have a 'E' node or an IDENTITY for some other type
1001    if (m_readytype!='E' && m_op!=IDENTITY)    if (m_readytype!='E' && m_op!=IDENTITY)
1002    {    {
1003      collapse();          collapse();
1004    }    }
1005    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
1006    {    {
1007      const ValueType& vec=m_id->getVectorRO();      const RealVectorType& vec=m_id->getVectorRO();
 //     if (m_readytype=='C')  
 //     {  
 //  roffset=0;      // all samples read from the same position  
 //  return &(m_samples);  
 //     }  
1008      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
1009    #ifdef LAZY_STACK_PROF
1010    int x;
1011    if (&x<stackend[omp_get_thread_num()])
1012    {
1013           stackend[omp_get_thread_num()]=&x;
1014    }
1015    #endif
1016      return &(vec);      return &(vec);
1017    }    }
1018    if (m_readytype!='E')    if (m_readytype!='E')
# Line 1681  LAZYDEBUG(cout << "Resolve sample " << t Line 1021  LAZYDEBUG(cout << "Resolve sample " << t
1021    }    }
1022    if (m_sampleids[tid]==sampleNo)    if (m_sampleids[tid]==sampleNo)
1023    {    {
1024      roffset=tid*m_samplesize;          roffset=tid*m_samplesize;
1025      return &(m_samples);        // sample is already resolved          return &(m_samples);            // sample is already resolved
1026    }    }
1027    m_sampleids[tid]=sampleNo;    m_sampleids[tid]=sampleNo;
1028    
1029    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1030    {    {
1031    case G_UNARY:    case G_UNARY:
# Line 1695  LAZYDEBUG(cout << "Resolve sample " << t Line 1036  LAZYDEBUG(cout << "Resolve sample " << t
1036    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1037    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1038    case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);    case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1039      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1040    default:    default:
1041      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1042    }    }
1043  }  }
1044    
1045  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1046  DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1047  {  {
1048      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1049      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1050      // processing single points.          // processing single points.
1051      // we will also know we won't get identity nodes          // we will also know we won't get identity nodes
1052    if (m_readytype!='E')    if (m_readytype!='E')
1053    {    {
1054      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
# Line 1715  DataLazy::resolveNodeUnary(int tid, int Line 1057  DataLazy::resolveNodeUnary(int tid, int
1057    {    {
1058      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1059    }    }
1060    const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);    const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1061    const double* left=&((*leftres)[roffset]);    const double* left=&((*leftres)[roffset]);
1062    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1063    double* result=&(m_samples[roffset]);    double* result=&(m_samples[roffset]);
1064      escript::ESFunction operation=SINF;
1065    switch (m_op)    switch (m_op)
1066    {    {
1067      case SIN:        case SIN:
1068      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);      operation=SINF;
1069      break;      break;
1070      case COS:      case COS:
1071      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);          operation=COSF;
1072      break;      break;
1073      case TAN:      case TAN:
1074      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);          operation=TANF;
1075      break;      break;
1076      case ASIN:      case ASIN:
1077      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);          operation=ASINF;
1078      break;      break;
1079      case ACOS:      case ACOS:
1080      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);          operation=ACOSF;
1081      break;      break;
1082      case ATAN:      case ATAN:
1083      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);          operation=ATANF;
1084      break;      break;
1085      case SINH:      case SINH:
1086      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);          operation=SINHF;
1087      break;      break;
1088      case COSH:      case COSH:
1089      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);          operation=COSHF;
1090      break;      break;
1091      case TANH:      case TANH:
1092      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);          operation=TANHF;
1093      break;      break;
1094      case ERF:      case ERF:
1095  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ERFF;
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
1096      break;      break;
 #endif  
1097     case ASINH:     case ASINH:
1098  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ASINHF;
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
1099      break;      break;
1100     case ACOSH:     case ACOSH:
1101  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ACOSHF;
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
1102      break;      break;
1103     case ATANH:     case ATANH:
1104  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ATANHF;
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
1105      break;      break;
1106      case LOG10:      case LOG10:
1107      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);          operation=LOG10F;
1108      break;      break;
1109      case LOG:      case LOG:
1110      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);          operation=LOGF;
1111      break;      break;
1112      case SIGN:      case SIGN:
1113      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          operation=SIGNF;
1114      break;      break;
1115      case ABS:      case ABS:
1116      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);          operation=ABSF;
1117      break;      break;
1118      case NEG:      case NEG:
1119      tensor_unary_operation(m_samplesize, left, result, negate<double>());          operation=NEGF;
1120      break;      break;
1121      case POS:      case POS:
1122      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1123      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1124      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1125      break;          break;
1126      case EXP:      case EXP:
1127      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);          operation=EXPF;
1128      break;      break;
1129      case SQRT:      case SQRT:
1130      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);          operation=SQRTF;
1131      break;      break;
1132      case RECIP:      case RECIP:
1133      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          operation=INVF;
1134      break;      break;
1135      case GZ:      case GZ:
1136      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          operation=GTZEROF;
1137      break;      break;
1138      case LZ:      case LZ:
1139      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          operation=LTZEROF;
1140      break;      break;
1141      case GEZ:      case GEZ:
1142      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          operation=GEZEROF;
1143      break;      break;
1144      case LEZ:      case LEZ:
1145      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          operation=LEZEROF;
1146      break;      break;
1147  // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1148      case NEZ:      case NEZ:
1149      tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));          operation=NEQZEROF;
1150      break;      break;
1151      case EZ:      case EZ:
1152      tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));          operation=EQZEROF;
1153      break;      break;
   
1154      default:      default:
1155      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1156    }    }
1157      tensor_unary_array_operation(m_samplesize,
1158                                 left,
1159                                 result,
1160                                 operation,
1161                                 m_tol);  
1162    return &(m_samples);    return &(m_samples);
1163  }  }
1164    
1165    
1166  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1167  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1168  {  {
1169      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1170      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1171      // processing single points.          // processing single points.
1172      // we will also know we won't get identity nodes          // we will also know we won't get identity nodes
1173    if (m_readytype!='E')    if (m_readytype!='E')
1174    {    {
1175      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
# Line 1848  DataLazy::resolveNodeReduction(int tid, Line 1179  DataLazy::resolveNodeReduction(int tid,
1179      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1180    }    }
1181    size_t loffset=0;    size_t loffset=0;
1182    const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);    const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1183    
1184    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1185    unsigned int ndpps=getNumDPPSample();    unsigned int ndpps=getNumDPPSample();
1186    unsigned int psize=DataTypes::noValues(getShape());    unsigned int psize=DataTypes::noValues(m_left->getShape());
1187    double* result=&(m_samples[roffset]);    double* result=&(m_samples[roffset]);
1188    switch (m_op)    switch (m_op)
1189    {    {
1190      case MINVAL:      case MINVAL:
1191      {          {
1192        for (unsigned int z=0;z<ndpps;++z)            for (unsigned int z=0;z<ndpps;++z)
1193        {            {
1194          FMin op;              FMin op;
1195          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());              *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1196          loffset+=psize;              loffset+=psize;
1197          result++;              result++;
1198        }            }
1199      }          }
1200      break;          break;
1201      case MAXVAL:      case MAXVAL:
1202      {          {
1203        for (unsigned int z=0;z<ndpps;++z)            for (unsigned int z=0;z<ndpps;++z)
1204        {            {
1205        FMax op;            FMax op;
1206        *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1207        loffset+=psize;            loffset+=psize;
1208        result++;            result++;
1209        }            }
1210      }          }
1211      break;          break;
1212      default:      default:
1213      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1214    }    }
1215    return &(m_samples);    return &(m_samples);
1216  }  }
1217    
1218  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1219  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1220  {  {
1221      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1222      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1223      // processing single points.          // processing single points.
1224    if (m_readytype!='E')    if (m_readytype!='E')
1225    {    {
1226      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
# Line 1899  DataLazy::resolveNodeNP1OUT(int tid, int Line 1230  DataLazy::resolveNodeNP1OUT(int tid, int
1230      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1231    }    }
1232    size_t subroffset;    size_t subroffset;
1233    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1234    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1235    size_t loop=0;    size_t loop=0;
1236    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
# Line 1908  DataLazy::resolveNodeNP1OUT(int tid, int Line 1239  DataLazy::resolveNodeNP1OUT(int tid, int
1239    switch (m_op)    switch (m_op)
1240    {    {
1241      case SYM:      case SYM:
1242      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1243      {          {
1244          DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);              DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1245          subroffset+=step;              subroffset+=step;
1246          offset+=step;              offset+=step;
1247      }          }
1248      break;          break;
1249      case NSYM:      case NSYM:
1250      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1251      {          {
1252          DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);              DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1253          subroffset+=step;              subroffset+=step;
1254          offset+=step;              offset+=step;
1255      }          }
1256      break;          break;
1257      default:      default:
1258      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1259    }    }
1260    return &m_samples;    return &m_samples;
1261  }  }
1262    
1263  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1264  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1265  {  {
1266      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1267      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1268      // processing single points.          // processing single points.
1269    if (m_readytype!='E')    if (m_readytype!='E')
1270    {    {
1271      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
# Line 1945  DataLazy::resolveNodeNP1OUT_P(int tid, i Line 1276  DataLazy::resolveNodeNP1OUT_P(int tid, i
1276    }    }
1277    size_t subroffset;    size_t subroffset;
1278    size_t offset;    size_t offset;
1279    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1280    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1281    offset=roffset;    offset=roffset;
1282    size_t loop=0;    size_t loop=0;
# Line 1955  DataLazy::resolveNodeNP1OUT_P(int tid, i Line 1286  DataLazy::resolveNodeNP1OUT_P(int tid, i
1286    switch (m_op)    switch (m_op)
1287    {    {
1288      case TRACE:      case TRACE:
1289      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1290      {          {
1291              DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);              DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1292          subroffset+=instep;              subroffset+=instep;
1293          offset+=outstep;              offset+=outstep;
1294      }          }
1295      break;          break;
1296      case TRANS:      case TRANS:
1297      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1298      {          {
1299              DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);              DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1300          subroffset+=instep;              subroffset+=instep;
1301          offset+=outstep;              offset+=outstep;
1302      }          }
1303      break;          break;
1304      default:      default:
1305      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1306    }    }
1307    return &m_samples;    return &m_samples;
1308  }  }
1309    
1310    
1311  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1312  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1313  {  {
1314    if (m_readytype!='E')    if (m_readytype!='E')
1315    {    {
# Line 1990  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1321  DataLazy::resolveNodeNP1OUT_2P(int tid,
1321    }    }
1322    size_t subroffset;    size_t subroffset;
1323    size_t offset;    size_t offset;
1324    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1325    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1326    offset=roffset;    offset=roffset;
1327    size_t loop=0;    size_t loop=0;
# Line 2000  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1331  DataLazy::resolveNodeNP1OUT_2P(int tid,
1331    switch (m_op)    switch (m_op)
1332    {    {
1333      case SWAP:      case SWAP:
1334      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1335      {          {
1336              DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);              DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1337          subroffset+=instep;              subroffset+=instep;
1338          offset+=outstep;              offset+=outstep;
1339      }          }
1340      break;          break;
1341      default:      default:
1342      throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1343    }    }
1344    return &m_samples;    return &m_samples;
1345  }  }
1346    
1347    const DataTypes::RealVectorType*
1348    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1349    {
1350      if (m_readytype!='E')
1351      {
1352        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1353      }
1354      if (m_op!=CONDEVAL)
1355      {
1356        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1357      }
1358      size_t subroffset;
1359    
1360      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1361      const RealVectorType* srcres=0;
1362      if ((*maskres)[subroffset]>0)
1363      {
1364            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1365      }
1366      else
1367      {
1368            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1369      }
1370    
1371      // Now we need to copy the result
1372    
1373      roffset=m_samplesize*tid;
1374      for (int i=0;i<m_samplesize;++i)
1375      {
1376            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1377      }
1378    
1379      return &m_samples;
1380    }
1381    
1382  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1383  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
# Line 2024  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1388  DataLazy::resolveNodeNP1OUT_2P(int tid,
1388  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1389  // For example, 2+Vector.  // For example, 2+Vector.
1390  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1391  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1392  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1393  {  {
1394  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1395    
1396    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1397      // first work out which of the children are expanded          // first work out which of the children are expanded
1398    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1399    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1400    if (!leftExp && !rightExp)    if (!leftExp && !rightExp)
1401    {    {
1402      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1403    }    }
1404    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1405    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1406    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1407    {    {
1408      throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1409    }    }
1410    size_t leftsize=m_left->getNoValues();    size_t leftsize=m_left->getNoValues();
1411    size_t rightsize=m_right->getNoValues();    size_t rightsize=m_right->getNoValues();
1412    size_t chunksize=1;           // how many doubles will be processed in one go    size_t chunksize=1;                   // how many doubles will be processed in one go
1413    int leftstep=0;       // how far should the left offset advance after each step    int leftstep=0;               // how far should the left offset advance after each step
1414    int rightstep=0;    int rightstep=0;
1415    int numsteps=0;       // total number of steps for the inner loop    int numsteps=0;               // total number of steps for the inner loop
1416    int oleftstep=0;  // the o variables refer to the outer loop    int oleftstep=0;      // the o variables refer to the outer loop
1417    int orightstep=0; // The outer loop is only required in cases where there is an extended scalar    int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1418    int onumsteps=1;    int onumsteps=1;
1419        
1420    bool LES=(leftExp && leftScalar); // Left is an expanded scalar    bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1421    bool RES=(rightExp && rightScalar);    bool RES=(rightExp && rightScalar);
1422    bool LS=(!leftExp && leftScalar); // left is a single scalar    bool LS=(!leftExp && leftScalar);     // left is a single scalar
1423    bool RS=(!rightExp && rightScalar);    bool RS=(!rightExp && rightScalar);
1424    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1425    bool RN=(!rightExp && !rightScalar);    bool RN=(!rightExp && !rightScalar);
1426    bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar    bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1427    bool REN=(rightExp && !rightScalar);    bool REN=(rightExp && !rightScalar);
1428    
1429    if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars    if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1430    {    {
1431      chunksize=m_left->getNumDPPSample()*leftsize;          chunksize=m_left->getNumDPPSample()*leftsize;
1432      leftstep=0;          leftstep=0;
1433      rightstep=0;          rightstep=0;
1434      numsteps=1;          numsteps=1;
1435    }    }
1436    else if (LES || RES)    else if (LES || RES)
1437    {    {
1438      chunksize=1;          chunksize=1;
1439      if (LES)        // left is an expanded scalar          if (LES)                // left is an expanded scalar
1440      {          {
1441          if (RS)                  if (RS)
1442          {                  {
1443             leftstep=1;                     leftstep=1;
1444             rightstep=0;                     rightstep=0;
1445             numsteps=m_left->getNumDPPSample();                     numsteps=m_left->getNumDPPSample();
1446          }                  }
1447          else        // RN or REN                  else            // RN or REN
1448          {                  {
1449             leftstep=0;                     leftstep=0;
1450             oleftstep=1;                     oleftstep=1;
1451             rightstep=1;                     rightstep=1;
1452             orightstep=(RN ? -(int)rightsize : 0);                     orightstep=(RN ? -(int)rightsize : 0);
1453             numsteps=rightsize;                     numsteps=rightsize;
1454             onumsteps=m_left->getNumDPPSample();                     onumsteps=m_left->getNumDPPSample();
1455          }                  }
1456      }          }
1457      else        // right is an expanded scalar          else            // right is an expanded scalar
1458      {          {
1459          if (LS)                  if (LS)
1460          {                  {
1461             rightstep=1;                     rightstep=1;
1462             leftstep=0;                     leftstep=0;
1463             numsteps=m_right->getNumDPPSample();                     numsteps=m_right->getNumDPPSample();
1464          }                  }
1465          else                  else
1466          {                  {
1467             rightstep=0;                     rightstep=0;
1468             orightstep=1;                     orightstep=1;
1469             leftstep=1;                     leftstep=1;
1470             oleftstep=(LN ? -(int)leftsize : 0);                     oleftstep=(LN ? -(int)leftsize : 0);
1471             numsteps=leftsize;                     numsteps=leftsize;
1472             onumsteps=m_right->getNumDPPSample();                     onumsteps=m_right->getNumDPPSample();
1473          }                  }
1474      }          }
1475    }    }
1476    else  // this leaves (LEN, RS), (LEN, RN) and their transposes    else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1477    {    {
1478      if (LEN)    // and Right will be a single value          if (LEN)        // and Right will be a single value
1479      {          {
1480          chunksize=rightsize;                  chunksize=rightsize;
1481          leftstep=rightsize;                  leftstep=rightsize;
1482          rightstep=0;                  rightstep=0;
1483          numsteps=m_left->getNumDPPSample();                  numsteps=m_left->getNumDPPSample();
1484          if (RS)                  if (RS)
1485          {                  {
1486             numsteps*=leftsize;                     numsteps*=leftsize;
1487          }                  }
1488      }          }
1489      else    // REN          else    // REN
1490      {          {
1491          chunksize=leftsize;                  chunksize=leftsize;
1492          rightstep=leftsize;                  rightstep=leftsize;
1493          leftstep=0;                  leftstep=0;
1494          numsteps=m_right->getNumDPPSample();                  numsteps=m_right->getNumDPPSample();
1495          if (LS)                  if (LS)
1496          {                  {
1497             numsteps*=rightsize;                     numsteps*=rightsize;
1498          }                  }
1499      }          }
1500    }    }
1501    
1502    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0    int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1503      // Get the values of sub-expressions          // Get the values of sub-expressions
1504    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1505    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);    const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1506  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1507  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1508  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
# Line 2152  LAZYDEBUG(cout << "Right res["<< rroffse Line 1516  LAZYDEBUG(cout << "Right res["<< rroffse
1516    
1517    
1518    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1519    double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved    double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1520    switch(m_op)    switch(m_op)
1521    {    {
1522      case ADD:      case ADD:
1523          PROC_OP(NO_ARG,plus<double>());          //PROC_OP(NO_ARG,plus<double>());
1524      break;        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1525                 &(*left)[0],
1526                 &(*right)[0],
1527                 chunksize,
1528                 onumsteps,
1529                 numsteps,
1530                 resultStep,
1531                 leftstep,
1532                 rightstep,
1533                 oleftstep,
1534                 orightstep,
1535                 lroffset,
1536                 rroffset,
1537                 escript::ESFunction::PLUSF);  
1538            break;
1539      case SUB:      case SUB:
1540      PROC_OP(NO_ARG,minus<double>());        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1541      break;               &(*left)[0],
1542                 &(*right)[0],
1543                 chunksize,
1544                 onumsteps,
1545                 numsteps,
1546                 resultStep,
1547                 leftstep,
1548                 rightstep,
1549                 oleftstep,
1550                 orightstep,
1551                 lroffset,
1552                 rroffset,
1553                 escript::ESFunction::MINUSF);        
1554            //PROC_OP(NO_ARG,minus<double>());
1555            break;
1556      case MUL:      case MUL:
1557      PROC_OP(NO_ARG,multiplies<double>());          //PROC_OP(NO_ARG,multiplies<double>());
1558      break;        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1559                 &(*left)[0],
1560                 &(*right)[0],
1561                 chunksize,
1562                 onumsteps,
1563                 numsteps,
1564                 resultStep,
1565                 leftstep,
1566                 rightstep,
1567                 oleftstep,
1568                 orightstep,
1569                 lroffset,
1570                 rroffset,
1571                 escript::ESFunction::MULTIPLIESF);      
1572            break;
1573      case DIV:      case DIV:
1574      PROC_OP(NO_ARG,divides<double>());          //PROC_OP(NO_ARG,divides<double>());
1575      break;        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1576                 &(*left)[0],
1577                 &(*right)[0],
1578                 chunksize,
1579                 onumsteps,
1580                 numsteps,
1581                 resultStep,
1582                 leftstep,
1583                 rightstep,
1584                 oleftstep,
1585                 orightstep,
1586                 lroffset,
1587                 rroffset,
1588                 escript::ESFunction::DIVIDESF);          
1589            break;
1590      case POW:      case POW:
1591         PROC_OP(double (double,double),::pow);         //PROC_OP(double (double,double),::pow);
1592      break;        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1593                 &(*left)[0],
1594                 &(*right)[0],
1595                 chunksize,
1596                 onumsteps,
1597                 numsteps,
1598                 resultStep,
1599                 leftstep,
1600                 rightstep,
1601                 oleftstep,
1602                 orightstep,
1603                 lroffset,
1604                 rroffset,
1605                 escript::ESFunction::POWF);          
1606            break;
1607      default:      default:
1608      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1609    }    }
1610  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1611    return &m_samples;    return &m_samples;
# Line 2181  LAZYDEBUG(cout << "Result res[" << roffs Line 1615  LAZYDEBUG(cout << "Result res[" << roffs
1615  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1616  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1617  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1618  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1619  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1620  {  {
1621  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1622    
1623    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1624      // first work out which of the children are expanded          // first work out which of the children are expanded
1625    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1626    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1627    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1628    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1629    int rightStep=(rightExp?m_right->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1630    
1631    int resultStep=getNoValues();    int resultStep=getNoValues();
1632    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1633    size_t offset=roffset;    size_t offset=roffset;
1634    
1635    const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);    const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1636    
1637    const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);    const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1638    
1639  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1640  cout << getNoValues() << endl;)  cout << getNoValues() << endl;)
# Line 2214  LAZYDEBUG(cout << "m_samplesize=" << m_s Line 1648  LAZYDEBUG(cout << "m_samplesize=" << m_s
1648  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1649  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1650    
1651    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1652    switch(m_op)    switch(m_op)
1653    {    {
1654      case PROD:      case PROD:
1655      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1656      {          {
1657            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1658            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1659    
1660  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1661  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1662    
1663            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1664    
1665        lroffset+=leftStep;            lroffset+=leftStep;
1666        rroffset+=rightStep;            rroffset+=rightStep;
1667      }          }
1668      break;          break;
1669      default:      default:
1670      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1671    }    }
1672    roffset=offset;    roffset=offset;
1673    return &m_samples;    return &m_samples;
1674  }  }
 #endif //LAZY_NODE_STORAGE  
   
 /*  
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   case G_REDUCTION: return resolveReduction(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
1675    
 }  
1676    
1677  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1678  DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1679  {  {
1680  #ifdef _OPENMP  #ifdef _OPENMP
1681      int tid=omp_get_thread_num();          int tid=omp_get_thread_num();
1682  #else  #else
1683      int tid=0;          int tid=0;
1684  #endif  #endif
1685  #ifdef LAZY_NODE_STORAGE  
1686      return resolveNodeSample(tid, sampleNo, roffset);  #ifdef LAZY_STACK_PROF
1687            stackstart[tid]=&tid;
1688            stackend[tid]=&tid;
1689            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1690            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1691            #pragma omp critical
1692            if (d>maxstackuse)
1693            {
1694    cout << "Max resolve Stack use " << d << endl;
1695                    maxstackuse=d;
1696            }
1697            return r;
1698  #else  #else
1699      return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);          return resolveNodeSample(tid, sampleNo, roffset);
1700  #endif  #endif
1701  }  }
1702    
# Line 2319  void Line 1706  void
1706  DataLazy::resolveToIdentity()  DataLazy::resolveToIdentity()
1707  {  {
1708     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1709      return;          return;
 #ifndef LAZY_NODE_STORAGE  
    DataReady_ptr p=resolveVectorWorker();  
 #else  
1710     DataReady_ptr p=resolveNodeWorker();     DataReady_ptr p=resolveNodeWorker();
 #endif  
1711     makeIdentity(p);     makeIdentity(p);
1712  }  }
1713    
# Line 2340  void DataLazy::makeIdentity(const DataRe Line 1723  void DataLazy::makeIdentity(const DataRe
1723     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1724     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1725     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1726     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1727     m_left.reset();     m_left.reset();
1728     m_right.reset();     m_right.reset();
1729  }  }
# Line 2355  DataLazy::resolve() Line 1736  DataLazy::resolve()
1736      return m_id;      return m_id;
1737  }  }
1738    
 #ifdef LAZY_NODE_STORAGE  
1739    
1740  // This version of resolve uses storage in each node to hold results  /* This is really a static method but I think that caused problems in windows */
1741  DataReady_ptr  void
1742  DataLazy::resolveNodeWorker()  DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1743  {  {
1744    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (dats.empty())
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.  
1745    {    {
1746      return m_id;          return;
1747    }    }
1748      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'    vector<DataLazy*> work;
1749    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    FunctionSpace fs=dats[0]->getFunctionSpace();
1750    ValueType& resvec=result->getVectorRW();    bool match=true;
1751    DataReady_ptr resptr=DataReady_ptr(result);    for (int i=dats.size()-1;i>=0;--i)
1752      {
1753    int sample;          if (dats[i]->m_readytype!='E')
1754    int totalsamples=getNumSamples();          {
1755    const ValueType* res=0;   // Storage for answer                  dats[i]->collapse();
1756  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)          }
1757    #pragma omp parallel for private(sample,res) schedule(static)          if (dats[i]->m_op!=IDENTITY)
1758    for (sample=0;sample<totalsamples;++sample)          {
1759    {                  work.push_back(dats[i]);
1760      size_t roffset=0;                  if (fs!=dats[i]->getFunctionSpace())
1761                    {
1762                            match=false;
1763                    }
1764            }
1765      }
1766      if (work.empty())
1767      {
1768            return;         // no work to do
1769      }
1770      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1771      {             // it is possible that dats[0] is one of the objects which we discarded and
1772                    // all the other functionspaces match.
1773            vector<DataExpanded*> dep;
1774            vector<RealVectorType*> vecs;
1775            for (int i=0;i<work.size();++i)
1776            {
1777                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1778                    vecs.push_back(&(dep[i]->getVectorRW()));
1779            }
1780            int totalsamples=work[0]->getNumSamples();
1781            const RealVectorType* res=0; // Storage for answer
1782            int sample;
1783            #pragma omp parallel private(sample, res)
1784            {
1785                size_t roffset=0;
1786                #pragma omp for schedule(static)
1787                for (sample=0;sample<totalsamples;++sample)
1788                {
1789                    roffset=0;
1790                    int j;
1791                    for (j=work.size()-1;j>=0;--j)
1792                    {
1793  #ifdef _OPENMP  #ifdef _OPENMP
1794      res=resolveNodeSample(omp_get_thread_num(),sample,roffset);                      res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1795  #else  #else
1796      res=resolveNodeSample(0,sample,roffset);                      res=work[j]->resolveNodeSample(0,sample,roffset);
1797  #endif  #endif
1798  LAZYDEBUG(cout << "Sample #" << sample << endl;)                      RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1799  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )                      memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1800      DataVector::size_type outoffset=result->getPointOffset(sample,0);                  }
1801      memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));              }
1802            }
1803            // Now we need to load the new results as identity ops into the lazy nodes
1804            for (int i=work.size()-1;i>=0;--i)
1805            {
1806                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1807            }
1808      }
1809      else  // functionspaces do not match
1810      {
1811            for (int i=0;i<work.size();++i)
1812            {
1813                    work[i]->resolveToIdentity();
1814            }
1815    }    }
   return resptr;  
1816  }  }
1817    
 #endif // LAZY_NODE_STORAGE  
1818    
1819  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1820  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1821  DataReady_ptr  DataReady_ptr
1822  DataLazy::resolveVectorWorker()  DataLazy::resolveNodeWorker()
1823  {  {
1824      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  
   if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally  
1825    {    {
1826      collapse();      collapse();
1827    }    }
1828    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1829    {    {
1830      return m_id;      return m_id;
1831    }    }
1832      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1833    size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1834      // storage to evaluate its expression    RealVectorType& resvec=result->getVectorRW();
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVectorRW();  
1835    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1836    
1837    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1838    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1839    const ValueType* res=0;   // Vector storing the answer    const RealVectorType* res=0;       // Storage for answer
   size_t resoffset=0;       // where in the vector to find the answer  
1840  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1841    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1842    {    {
1843  LAZYDEBUG(cout << "################################# " << sample << endl;)          size_t roffset=0;
1844    #ifdef LAZY_STACK_PROF
1845            stackstart[omp_get_thread_num()]=&roffset;
1846            stackend[omp_get_thread_num()]=&roffset;
1847    #endif
1848            #pragma omp for schedule(static)
1849            for (sample=0;sample<totalsamples;++sample)
1850            {
1851                    roffset=0;
1852  #ifdef _OPENMP  #ifdef _OPENMP
1853      res=resolveVectorSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1854  #else  #else
1855      res=resolveVectorSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1856  #endif  #endif
1857  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1858  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1859      outoffset=result->getPointOffset(sample,0);                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1860  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1861      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector          }
1862      {    }
1863  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  #ifdef LAZY_STACK_PROF
1864      resvec[outoffset]=(*res)[resoffset];    for (int i=0;i<getNumberOfThreads();++i)
1865      }    {
1866  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)          size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1867  LAZYDEBUG(cerr << "*********************************" << endl;)  //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1868            if (r>maxstackuse)
1869            {
1870                    maxstackuse=r;
1871            }
1872    }    }
1873      cout << "Max resolve Stack use=" << maxstackuse << endl;
1874    #endif
1875    return resptr;    return resptr;
1876  }  }
1877    
# Line 2459  std::string Line 1879  std::string
1879  DataLazy::toString() const  DataLazy::toString() const
1880  {  {
1881    ostringstream oss;    ostringstream oss;
1882    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1883    if (escriptParams.getPRINT_LAZY_TREE()==0)    switch (escriptParams.getLAZY_STR_FMT())
   {  
       intoString(oss);  
   }  
   else  
1884    {    {
1885      oss << endl;    case 1:       // tree format
1886      intoTreeString(oss,"");          oss << endl;
1887            intoTreeString(oss,"");
1888            break;
1889      case 2:       // just the depth
1890            break;
1891      default:
1892            intoString(oss);
1893            break;
1894    }    }
1895    return oss.str();    return oss.str();
1896  }  }
# Line 2480  DataLazy::intoString(ostringstream& oss) Line 1903  DataLazy::intoString(ostringstream& oss)
1903    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1904    {    {
1905    case G_IDENTITY:    case G_IDENTITY:
1906      if (m_id->isExpanded())          if (m_id->isExpanded())
1907      {          {
1908         oss << "E";             oss << "E";
1909      }          }
1910      else if (m_id->isTagged())          else if (m_id->isTagged())
1911      {          {
1912        oss << "T";            oss << "T";
1913      }          }
1914      else if (m_id->isConstant())          else if (m_id->isConstant())
1915      {          {
1916        oss << "C";            oss << "C";
1917      }          }
1918      else          else
1919      {          {
1920        oss << "?";            oss << "?";
1921      }          }
1922      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1923      break;          break;
1924    case G_BINARY:    case G_BINARY:
1925      oss << '(';          oss << '(';
1926      m_left->intoString(oss);          m_left->intoString(oss);
1927      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1928      m_right->intoString(oss);          m_right->intoString(oss);
1929      oss << ')';          oss << ')';
1930      break;          break;
1931    case G_UNARY:    case G_UNARY:
1932    case G_UNARY_P:    case G_UNARY_P:
1933    case G_NP1OUT:    case G_NP1OUT:
1934    case G_NP1OUT_P:    case G_NP1OUT_P:
1935    case G_REDUCTION:    case G_REDUCTION:
1936      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1937      m_left->intoString(oss);          m_left->intoString(oss);
1938      oss << ')';          oss << ')';
1939      break;          break;
1940    case G_TENSORPROD:    case G_TENSORPROD:
1941      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1942      m_left->intoString(oss);          m_left->intoString(oss);
1943      oss << ", ";          oss << ", ";
1944      m_right->intoString(oss);          m_right->intoString(oss);
1945      oss << ')';          oss << ')';
1946      break;          break;
1947    case G_NP1OUT_2P:    case G_NP1OUT_2P:
1948      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1949      m_left->intoString(oss);          m_left->intoString(oss);
1950      oss << ", " << m_axis_offset << ", " << m_transpose;          oss << ", " << m_axis_offset << ", " << m_transpose;
1951      oss << ')';          oss << ')';
1952      break;          break;
1953      case G_CONDEVAL:
1954            oss << opToString(m_op)<< '(' ;
1955            m_mask->intoString(oss);
1956            oss << " ? ";
1957            m_left->intoString(oss);
1958            oss << " : ";
1959            m_right->intoString(oss);
1960            oss << ')';
1961            break;
1962    default:    default:
1963      oss << "UNKNOWN";          oss << "UNKNOWN";
1964    }    }
1965  }  }
1966    
# Line 2540  DataLazy::intoTreeString(ostringstream& Line 1972  DataLazy::intoTreeString(ostringstream&
1972    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1973    {    {
1974    case G_IDENTITY:    case G_IDENTITY:
1975      if (m_id->isExpanded())          if (m_id->isExpanded())
1976      {          {
1977         oss << "E";             oss << "E";
1978      }          }
1979      else if (m_id->isTagged())          else if (m_id->isTagged())
1980      {          {
1981        oss << "T";            oss << "T";
1982      }          }
1983      else if (m_id->isConstant())          else if (m_id->isConstant())
1984      {          {
1985        oss << "C";            oss << "C";
1986      }          }
1987      else          else
1988      {          {
1989        oss << "?";            oss << "?";
1990      }          }
1991      oss << '@' << m_id.get() << endl;          oss << '@' << m_id.get() << endl;
1992      break;          break;
1993    case G_BINARY:    case G_BINARY:
1994      oss << opToString(m_op) << endl;          oss << opToString(m_op) << endl;
1995      indent+='.';          indent+='.';
1996      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
1997      m_right->intoTreeString(oss, indent);          m_right->intoTreeString(oss, indent);
1998      break;          break;
1999    case G_UNARY:    case G_UNARY:
2000    case G_UNARY_P:    case G_UNARY_P:
2001    case G_NP1OUT:    case G_NP1OUT:
2002    case G_NP1OUT_P:    case G_NP1OUT_P:
2003    case G_REDUCTION:    case G_REDUCTION:
2004      oss << opToString(m_op) << endl;          oss << opToString(m_op) << endl;
2005      indent+='.';          indent+='.';
2006      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
2007      break;          break;
2008    case G_TENSORPROD:    case G_TENSORPROD:
2009      oss << opToString(m_op) << endl;          oss << opToString(m_op) << endl;
2010      indent+='.';          indent+='.';
2011      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
2012      m_right->intoTreeString(oss, indent);          m_right->intoTreeString(oss, indent);
2013      break;          break;
2014    case G_NP1OUT_2P:    case G_NP1OUT_2P:
2015      oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;          oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2016      indent+='.';          indent+='.';
2017      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
2018      break;          break;
2019    default:    default:
2020      oss << "UNKNOWN";          oss << "UNKNOWN";
2021    }    }
2022  }  }
2023    
2024    
2025  DataAbstract*  DataAbstract*
2026  DataLazy::deepCopy()  DataLazy::deepCopy() const
2027  {  {
2028    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2029    {    {
2030    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2031    case G_UNARY:    case G_UNARY:
2032    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2033    case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);    case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2034    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2035    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2036    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2037    case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);    case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2038    case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2039    default:    default:
2040      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2041    }    }
2042  }  }
2043    
# Line 2617  DataLazy::deepCopy() Line 2049  DataLazy::deepCopy()
2049  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2050  // form part of the expression.  // form part of the expression.
2051  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2052  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2053  DataLazy::getLength() const  DataLazy::getLength() const
2054  {  {
2055    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 2632  DataLazy::getSlice(const DataTypes::Regi Line 2064  DataLazy::getSlice(const DataTypes::Regi
2064    
2065    
2066  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2067  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2068  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2069                   int dataPointNo)                   int dataPointNo)
2070  {  {
2071    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2072    {    {
2073      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2074    }    }
2075    if (m_readytype!='E')    if (m_readytype!='E')
2076    {    {
2077      collapse();          collapse();
2078      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2079    }    }
2080    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
2081    // so we only need to know which child to ask    // so we only need to know which child to ask
2082    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
2083    {    {
2084      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2085    }    }
2086    else    else
2087    {    {
2088      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2089    }    }
2090  }  }
2091    
2092  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2093  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2094  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2095                   int dataPointNo) const                   int dataPointNo) const
2096  {  {
2097    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2098    {    {
2099      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2100    }    }
2101    if (m_readytype=='E')    if (m_readytype=='E')
2102    {    {
# Line 2672  DataLazy::getPointOffset(int sampleNo, Line 2104  DataLazy::getPointOffset(int sampleNo,
2104      // so we only need to know which child to ask      // so we only need to know which child to ask
2105      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
2106      {      {
2107      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2108      }      }
2109      else      else
2110      {      {
2111      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2112      }      }
2113    }    }
2114    if (m_readytype=='C')    if (m_readytype=='C')
2115    {    {
2116      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2117    }    }
2118    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2119  }  }
# Line 2691  DataLazy::getPointOffset(int sampleNo, Line 2123  DataLazy::getPointOffset(int sampleNo,
2123  void  void
2124  DataLazy::setToZero()  DataLazy::setToZero()
2125  {  {
2126  //   DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2127  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2128  //   m_op=IDENTITY;  //   m_op=IDENTITY;
2129  //   m_right.reset();    //   m_right.reset();  
# Line 2699  DataLazy::setToZero() Line 2131  DataLazy::setToZero()
2131  //   m_readytype='C';  //   m_readytype='C';
2132  //   m_buffsRequired=1;  //   m_buffsRequired=1;
2133    
2134    privdebug=privdebug;  // to stop the compiler complaining about unused privdebug    (void)privdebug;  // to stop the compiler complaining about unused privdebug
2135    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2136  }  }
2137    
2138  bool  bool
2139  DataLazy::actsExpanded() const  DataLazy::actsExpanded() const
2140  {  {
2141      return (m_readytype=='E');          return (m_readytype=='E');
2142  }  }
2143    
2144  }   // end namespace  } // end namespace
2145    

Legend:
Removed from v.2737  
changed lines
  Added in v.6072

  ViewVC Help
Powered by ViewVC 1.1.26