/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2496 by jfenwick, Fri Jun 26 06:09:47 2009 UTC trunk/escriptcore/src/DataLazy.cpp revision 6042 by jfenwick, Wed Mar 9 04:30:36 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18    #include "Data.h"
19    #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "UnaryFuncs.h"    // for escript::fsign
23    #include "Utils.h"
24    #include "DataMaths.h"
25    
26  #ifdef USE_NETCDF  #ifdef USE_NETCDF
27  #include <netcdfcpp.h>  #include <netcdfcpp.h>
28  #endif  #endif
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
 #include "Data.h"  
 #include "UnaryFuncs.h"     // for escript::fsign  
 #include "Utils.h"  
29    
30  #include "EscriptParams.h"  #include <iomanip> // for some fancy formatting in debug
31    
32  #include <iomanip>      // for some fancy formatting in debug  using namespace escript::DataTypes;
33    
34    #define NO_ARG
35    
36  // #define LAZYDEBUG(X) if (privdebug){X;}  // #define LAZYDEBUG(X) if (privdebug){X;}
37  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
# Line 42  bool privdebug=false; Line 43  bool privdebug=false;
43  #define DISABLEDEBUG privdebug=false;  #define DISABLEDEBUG privdebug=false;
44  }  }
45    
46  // #define SIZELIMIT  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47  // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}  
48  //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49    
50  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  
51    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53  /*  /*
54  How does DataLazy work?  How does DataLazy work?
# Line 59  A special operation, IDENTITY, stores an Line 61  A special operation, IDENTITY, stores an
61  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
62    
63  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
64  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
65    
66  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
67  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 67  I will refer to individual DataLazy obje Line 69  I will refer to individual DataLazy obje
69    
70  Each node also stores:  Each node also stores:
71  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
72      evaluated.          evaluated.
73  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
74      evaluate the expression.          evaluate the expression.
75  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
76    
77  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 110  namespace escript Line 112  namespace escript
112  namespace  namespace
113  {  {
114    
115    
116    // enabling this will print out when ever the maximum stacksize used by resolve increases
117    // it assumes _OPENMP is also in use
118    //#define LAZY_STACK_PROF
119    
120    
121    
122    #ifndef _OPENMP
123      #ifdef LAZY_STACK_PROF
124      #undef LAZY_STACK_PROF
125      #endif
126    #endif
127    
128    
129    #ifdef LAZY_STACK_PROF
130    std::vector<void*> stackstart(getNumberOfThreads());
131    std::vector<void*> stackend(getNumberOfThreads());
132    size_t maxstackuse=0;
133    #endif
134    
135  enum ES_opgroup  enum ES_opgroup
136  {  {
137     G_UNKNOWN,     G_UNKNOWN,
138     G_IDENTITY,     G_IDENTITY,
139     G_BINARY,        // pointwise operations with two arguments     G_BINARY,            // pointwise operations with two arguments
140     G_UNARY,     // pointwise operations with one argument     G_UNARY,             // pointwise operations with one argument
141     G_UNARY_P,       // pointwise operations with one argument, requiring a parameter     G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
142     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,            // non-pointwise op with one output
143     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
144     G_TENSORPROD,    // general tensor product     G_TENSORPROD,        // general tensor product
145     G_NP1OUT_2P      // non-pointwise op with one output requiring two params     G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
146       G_REDUCTION,         // non-pointwise unary op with a scalar output
147       G_CONDEVAL
148  };  };
149    
150    
151    
152    
153  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
154              "sin","cos","tan",                          "sin","cos","tan",
155              "asin","acos","atan","sinh","cosh","tanh","erf",                          "asin","acos","atan","sinh","cosh","tanh","erf",
156              "asinh","acosh","atanh",                          "asinh","acosh","atanh",
157              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "log10","log","sign","abs","neg","pos","exp","sqrt",
158              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
159              "symmetric","nonsymmetric",                          "symmetric","nonsymmetric",
160              "prod",                          "prod",
161              "transpose", "trace",                          "transpose", "trace",
162              "swapaxes"};                          "swapaxes",
163  int ES_opcount=41;                          "minval", "maxval",
164                            "condEval"};
165    int ES_opcount=44;
166  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167              G_UNARY,G_UNARY,G_UNARY, //10                          G_UNARY,G_UNARY,G_UNARY, //10
168              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
169              G_UNARY,G_UNARY,G_UNARY,                    // 20                          G_UNARY,G_UNARY,G_UNARY,                                        // 20
170              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
172              G_NP1OUT,G_NP1OUT,                          G_NP1OUT,G_NP1OUT,
173              G_TENSORPROD,                          G_TENSORPROD,
174              G_NP1OUT_P, G_NP1OUT_P,                          G_NP1OUT_P, G_NP1OUT_P,
175              G_NP1OUT_2P};                          G_NP1OUT_2P,
176                            G_REDUCTION, G_REDUCTION,
177                            G_CONDEVAL};
178  inline  inline
179  ES_opgroup  ES_opgroup
180  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 158  getOpgroup(ES_optype op) Line 186  getOpgroup(ES_optype op)
186  FunctionSpace  FunctionSpace
187  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
188  {  {
189      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
190      // maybe we need an interpolate node -          // maybe we need an interpolate node -
191      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
192      // programming error exception.          // programming error exception.
193    
194    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
195    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
196    if (l!=r)    if (l!=r)
197    {    {
198      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
199        if (res==1)
200      {      {
201      return l;          return l;
202      }      }
203      if (l.probeInterpolation(r))      if (res==-1)
204      {      {
205      return r;          return r;
206      }      }
207      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
208    }    }
# Line 185  resultFS(DataAbstract_ptr left, DataAbst Line 214  resultFS(DataAbstract_ptr left, DataAbst
214  DataTypes::ShapeType  DataTypes::ShapeType
215  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
216  {  {
217      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
218      {          {
219        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
220        {            {
221          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
222        }            }
223        if (left->getRank()==0)   // we need to allow scalar * anything  
224        {            if (left->getRank()==0)       // we need to allow scalar * anything
225          return right->getShape();            {
226        }                  return right->getShape();
227        if (right->getRank()==0)            }
228        {            if (right->getRank()==0)
229          return left->getShape();            {
230        }                  return left->getShape();
231        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            }
232      }            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
233      return left->getShape();          }
234            return left->getShape();
235  }  }
236    
237  // return the shape for "op left"  // return the shape for "op left"
# Line 209  resultShape(DataAbstract_ptr left, DataA Line 239  resultShape(DataAbstract_ptr left, DataA
239  DataTypes::ShapeType  DataTypes::ShapeType
240  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
241  {  {
242      switch(op)          switch(op)
243      {          {
244          case TRANS:          case TRANS:
245         {            // for the scoping of variables             {                    // for the scoping of variables
246          const DataTypes::ShapeType& s=left->getShape();                  const DataTypes::ShapeType& s=left->getShape();
247          DataTypes::ShapeType sh;                  DataTypes::ShapeType sh;
248          int rank=left->getRank();                  int rank=left->getRank();
249          if (axis_offset<0 || axis_offset>rank)                  if (axis_offset<0 || axis_offset>rank)
250          {                  {
251                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
252              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
253              for (int i=0; i<rank; i++)              throw DataException(e.str());
254          {          }
255             int index = (axis_offset+i)%rank;          for (int i=0; i<rank; i++)
256                 sh.push_back(s[index]); // Append to new shape                  {
257              }                     int index = (axis_offset+i)%rank;
258          return sh;             sh.push_back(s[index]); // Append to new shape
259         }          }
260      break;                  return sh;
261      case TRACE:             }
262         {          break;
263          int rank=left->getRank();          case TRACE:
264          if (rank<2)             {
265          {                  int rank=left->getRank();
266             throw DataException("Trace can only be computed for objects with rank 2 or greater.");                  if (rank<2)
267          }                  {
268          if ((axis_offset>rank-2) || (axis_offset<0))                     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
269          {                  }
270             throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");                  if ((axis_offset>rank-2) || (axis_offset<0))
271          }                  {
272          if (rank==2)                     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
273          {                  }
274             return DataTypes::scalarShape;                  if (rank==2)
275          }                  {
276          else if (rank==3)                     return DataTypes::scalarShape;
277          {                  }
278             DataTypes::ShapeType sh;                  else if (rank==3)
279                 if (axis_offset==0)                  {
280             {                     DataTypes::ShapeType sh;
281                  sh.push_back(left->getShape()[2]);                     if (axis_offset==0)
282                 }                     {
283                 else     // offset==1                          sh.push_back(left->getShape()[2]);
284             {                     }
285              sh.push_back(left->getShape()[0]);                     else         // offset==1
286                 }                     {
287             return sh;                          sh.push_back(left->getShape()[0]);
288          }                     }
289          else if (rank==4)                     return sh;
290          {                  }
291             DataTypes::ShapeType sh;                  else if (rank==4)
292             const DataTypes::ShapeType& s=left->getShape();                  {
293                 if (axis_offset==0)                     DataTypes::ShapeType sh;
294             {                     const DataTypes::ShapeType& s=left->getShape();
295                  sh.push_back(s[2]);                     if (axis_offset==0)
296                  sh.push_back(s[3]);                     {
297                 }                          sh.push_back(s[2]);
298                 else if (axis_offset==1)                          sh.push_back(s[3]);
299             {                     }
300                  sh.push_back(s[0]);                     else if (axis_offset==1)
301                  sh.push_back(s[3]);                     {
302                 }                          sh.push_back(s[0]);
303             else     // offset==2                          sh.push_back(s[3]);
304             {                     }
305              sh.push_back(s[0]);                     else         // offset==2
306              sh.push_back(s[1]);                     {
307             }                          sh.push_back(s[0]);
308             return sh;                          sh.push_back(s[1]);
309          }                     }
310          else        // unknown rank                     return sh;
311          {                  }
312             throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");                  else            // unknown rank
313          }                  {
314         }                     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
315      break;                  }
316          default:             }
317      throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");          break;
318      }          default:
319            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
320            }
321  }  }
322    
323  DataTypes::ShapeType  DataTypes::ShapeType
# Line 303  SwapShape(DataAbstract_ptr left, const i Line 335  SwapShape(DataAbstract_ptr left, const i
335          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
336       }       }
337       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
338          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
339            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
340            throw DataException(e.str());
341       }       }
342       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
343           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          stringstream e;
344            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
345            throw DataException(e.str());
346       }       }
347       if (axis0 == axis1) {       if (axis0 == axis1) {
348           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 337  SwapShape(DataAbstract_ptr left, const i Line 373  SwapShape(DataAbstract_ptr left, const i
373  DataTypes::ShapeType  DataTypes::ShapeType
374  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
375  {  {
376                
377    // Get rank and shape of inputs    // Get rank and shape of inputs
378    int rank0 = left->getRank();    int rank0 = left->getRank();
379    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 346  GTPShape(DataAbstract_ptr left, DataAbst Line 382  GTPShape(DataAbstract_ptr left, DataAbst
382    
383    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
384    int start0=0, start1=0;    int start0=0, start1=0;
385    if (transpose == 0)       {}    if (transpose == 0)           {}
386    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
387    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
388    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
389    
390    if (rank0<axis_offset)    if (rank0<axis_offset)
391    {    {
392      throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");          throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
393    }    }
394    
395    // Adjust the shapes for transpose    // Adjust the shapes for transpose
396    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
397    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
398    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
399    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
400    
401    // Prepare for the loops of the product    // Prepare for the loops of the product
402    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
403    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
404      SL *= tmpShape0[i];      SL *= tmpShape0[i];
405    }    }
406    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
407      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
408        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
409      }      }
410      SM *= tmpShape0[i];      SM *= tmpShape0[i];
411    }    }
412    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
413      SR *= tmpShape1[i];      SR *= tmpShape1[i];
414    }    }
415    
416    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
417    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
418    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
419       int out_index=0;       int out_index=0;
420       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
421       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
# Line 395  GTPShape(DataAbstract_ptr left, DataAbst Line 431  GTPShape(DataAbstract_ptr left, DataAbst
431    return shape2;    return shape2;
432  }  }
433    
434  // determine the number of samples requires to evaluate an expression combining left and right  }       // end anonymous namespace
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
 }   // end anonymous namespace  
435    
436    
437    
# Line 435  opToString(ES_optype op) Line 446  opToString(ES_optype op)
446    return ES_opstrings[op];    return ES_opstrings[op];
447  }  }
448    
449    void DataLazy::LazyNodeSetup()
450    {
451    #ifdef _OPENMP
452        int numthreads=omp_get_max_threads();
453        m_samples.resize(numthreads*m_samplesize);
454        m_sampleids=new int[numthreads];
455        for (int i=0;i<numthreads;++i)
456        {
457            m_sampleids[i]=-1;  
458        }
459    #else
460        m_samples.resize(m_samplesize);
461        m_sampleids=new int[1];
462        m_sampleids[0]=-1;
463    #endif  // _OPENMP
464    }
465    
466    
467    // Creates an identity node
468  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
469      : parent(p->getFunctionSpace(),p->getShape())          : parent(p->getFunctionSpace(),p->getShape())
470            ,m_sampleids(0),
471            m_samples(1)
472  {  {
473     if (p->isLazy())     if (p->isLazy())
474     {     {
475      // I don't want identity of Lazy.          // I don't want identity of Lazy.
476      // Question: Why would that be so bad?          // Question: Why would that be so bad?
477      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
478      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
479     }     }
480     else     else
481     {     {
482      p->makeLazyShared();          p->makeLazyShared();
483      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
484      makeIdentity(dr);          makeIdentity(dr);
485  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
486     }     }
487  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
488  }  }
489    
   
   
   
490  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
491      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
492      m_op(op),          m_op(op),
493      m_axis_offset(0),          m_axis_offset(0),
494      m_transpose(0),          m_transpose(0),
495      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
496  {  {
497     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
498     {     {
499      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
500     }     }
501    
502     DataLazy_ptr lleft;     DataLazy_ptr lleft;
503     if (!left->isLazy())     if (!left->isLazy())
504     {     {
505      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
506     }     }
507     else     else
508     {     {
509      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
510     }     }
511     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
512     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
513     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
514     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
515     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
516       LazyNodeSetup();
517     SIZELIMIT     SIZELIMIT
518  }  }
519    
520    
521  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
522  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
523      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
524      m_op(op),          m_op(op),
525      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
526  {  {
527  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
528     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
529     {     {
530      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
531     }     }
532    
533     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
534     {     {
535      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
536      Data ltemp(left);          Data ltemp(left);
537      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
538      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
539     }     }
540     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
541     {     {
542      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
543      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
544  LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)  LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
545     }     }
546     left->operandCheck(*right);     left->operandCheck(*right);
547    
548     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
549     {     {
550      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
551  LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)  LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
552     }     }
553     else     else
554     {     {
555      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
556  LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)  LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
557     }     }
558     if (right->isLazy())     if (right->isLazy())
559     {     {
560      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
561  LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)  LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
562     }     }
563     else     else
564     {     {
565      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
566  LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)  LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
567     }     }
568     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
569     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
570     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
571     {     {
572      m_readytype='E';          m_readytype='E';
573     }     }
574     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
575     {     {
576      m_readytype='T';          m_readytype='T';
577     }     }
578     else     else
579     {     {
580      m_readytype='C';          m_readytype='C';
581     }     }
582     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
583     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
584     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
585       LazyNodeSetup();
586     SIZELIMIT     SIZELIMIT
587  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588  }  }
589    
590  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
591      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
592      m_op(op),          m_op(op),
593      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
594      m_transpose(transpose)          m_transpose(transpose)
595  {  {
596     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
597     {     {
598      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
599     }     }
600     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
601     {     {
602      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
603     }     }
604     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
605     {     {
606      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
607      Data ltemp(left);          Data ltemp(left);
608      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
609      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
610     }     }
611     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
612     {     {
613      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
614      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
615     }     }
616  //    left->operandCheck(*right);  //    left->operandCheck(*right);
617    
618     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
619     {     {
620      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
621     }     }
622     else     else
623     {     {
624      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
625     }     }
626     if (right->isLazy())     if (right->isLazy())
627     {     {
628      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
629     }     }
630     else     else
631     {     {
632      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
633     }     }
634     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
635     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
636     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
637     {     {
638      m_readytype='E';          m_readytype='E';
639     }     }
640     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
641     {     {
642      m_readytype='T';          m_readytype='T';
643     }     }
644     else     else
645     {     {
646      m_readytype='C';          m_readytype='C';
647     }     }
648     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
649     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
650     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
651       LazyNodeSetup();
652     SIZELIMIT     SIZELIMIT
653  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
654  }  }
655    
656    
657  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
658      : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),          : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
659      m_op(op),          m_op(op),
660      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
661      m_transpose(0),          m_transpose(0),
662      m_tol(0)          m_tol(0)
663  {  {
664     if ((getOpgroup(op)!=G_NP1OUT_P))     if ((getOpgroup(op)!=G_NP1OUT_P))
665     {     {
666      throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
667     }     }
668     DataLazy_ptr lleft;     DataLazy_ptr lleft;
669     if (!left->isLazy())     if (!left->isLazy())
670     {     {
671      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
672     }     }
673     else     else
674     {     {
675      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
676     }     }
677     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
678     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
679     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
680     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
681     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
682       LazyNodeSetup();
683     SIZELIMIT     SIZELIMIT
684  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
685  }  }
686    
687  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
688      : parent(left->getFunctionSpace(), left->getShape()),          : parent(left->getFunctionSpace(), left->getShape()),
689      m_op(op),          m_op(op),
690      m_axis_offset(0),          m_axis_offset(0),
691      m_transpose(0),          m_transpose(0),
692      m_tol(tol)          m_tol(tol)
693  {  {
694     if ((getOpgroup(op)!=G_UNARY_P))     if ((getOpgroup(op)!=G_UNARY_P))
695     {     {
696      throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
697     }     }
698     DataLazy_ptr lleft;     DataLazy_ptr lleft;
699     if (!left->isLazy())     if (!left->isLazy())
700     {     {
701      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
702     }     }
703     else     else
704     {     {
705      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
706     }     }
707     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
708     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
709     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
710     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
711     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
712       LazyNodeSetup();
713     SIZELIMIT     SIZELIMIT
714  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
715  }  }
716    
717    
718  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
719      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),          : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
720      m_op(op),          m_op(op),
721      m_axis_offset(axis0),          m_axis_offset(axis0),
722      m_transpose(axis1),          m_transpose(axis1),
723      m_tol(0)          m_tol(0)
724  {  {
725     if ((getOpgroup(op)!=G_NP1OUT_2P))     if ((getOpgroup(op)!=G_NP1OUT_2P))
726     {     {
727      throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
728     }     }
729     DataLazy_ptr lleft;     DataLazy_ptr lleft;
730     if (!left->isLazy())     if (!left->isLazy())
731     {     {
732      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
733     }     }
734     else     else
735     {     {
736      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
737     }     }
738     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
739     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
740     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
741     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
742     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
743       LazyNodeSetup();
744     SIZELIMIT     SIZELIMIT
745  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
746  }  }
747    
748  DataLazy::~DataLazy()  
749    namespace
750  {  {
 }  
751    
752        inline int max3(int a, int b, int c)
753        {
754            int t=(a>b?a:b);
755            return (t>c?t:c);
756    
757  int      }
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
758  }  }
759    
760    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
761  size_t          : parent(left->getFunctionSpace(), left->getShape()),
762  DataLazy::getMaxSampleSize() const          m_op(CONDEVAL),
763            m_axis_offset(0),
764            m_transpose(0),
765            m_tol(0)
766  {  {
767      return m_maxsamplesize;  
768       DataLazy_ptr lmask;
769       DataLazy_ptr lleft;
770       DataLazy_ptr lright;
771       if (!mask->isLazy())
772       {
773            lmask=DataLazy_ptr(new DataLazy(mask));
774       }
775       else
776       {
777            lmask=dynamic_pointer_cast<DataLazy>(mask);
778       }
779       if (!left->isLazy())
780       {
781            lleft=DataLazy_ptr(new DataLazy(left));
782       }
783       else
784       {
785            lleft=dynamic_pointer_cast<DataLazy>(left);
786       }
787       if (!right->isLazy())
788       {
789            lright=DataLazy_ptr(new DataLazy(right));
790       }
791       else
792       {
793            lright=dynamic_pointer_cast<DataLazy>(right);
794       }
795       m_readytype=lmask->m_readytype;
796       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
797       {
798            throw DataException("Programmer Error - condEval arguments must have the same readytype");
799       }
800       m_left=lleft;
801       m_right=lright;
802       m_mask=lmask;
803       m_samplesize=getNumDPPSample()*getNoValues();
804       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
805       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
806       LazyNodeSetup();
807       SIZELIMIT
808    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
809  }  }
810    
811    
812    
813  size_t  DataLazy::~DataLazy()
 DataLazy::getSampleBufferSize() const  
814  {  {
815      return m_maxsamplesize*(max(1,m_buffsRequired));     delete[] m_sampleids;
816  }  }
817    
818    
819  /*  /*
820    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
821    This does the work for the collapse method.    This does the work for the collapse method.
822    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
823  */  */
824  DataReady_ptr  DataReady_ptr
825  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
826  {  {
827    if (m_readytype=='E')    if (m_readytype=='E')
828    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
829      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
830    }    }
831    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 776  DataLazy::collapseToReady() Line 843  DataLazy::collapseToReady()
843    switch(m_op)    switch(m_op)
844    {    {
845      case ADD:      case ADD:
846      result=left+right;          result=left+right;
847      break;          break;
848      case SUB:            case SUB:          
849      result=left-right;          result=left-right;
850      break;          break;
851      case MUL:            case MUL:          
852      result=left*right;          result=left*right;
853      break;          break;
854      case DIV:            case DIV:          
855      result=left/right;          result=left/right;
856      break;          break;
857      case SIN:      case SIN:
858      result=left.sin();            result=left.sin();      
859      break;          break;
860      case COS:      case COS:
861      result=left.cos();          result=left.cos();
862      break;          break;
863      case TAN:      case TAN:
864      result=left.tan();          result=left.tan();
865      break;          break;
866      case ASIN:      case ASIN:
867      result=left.asin();          result=left.asin();
868      break;          break;
869      case ACOS:      case ACOS:
870      result=left.acos();          result=left.acos();
871      break;          break;
872      case ATAN:      case ATAN:
873      result=left.atan();          result=left.atan();
874      break;          break;
875      case SINH:      case SINH:
876      result=left.sinh();          result=left.sinh();
877      break;          break;
878      case COSH:      case COSH:
879      result=left.cosh();          result=left.cosh();
880      break;          break;
881      case TANH:      case TANH:
882      result=left.tanh();          result=left.tanh();
883      break;          break;
884      case ERF:      case ERF:
885      result=left.erf();          result=left.erf();
886      break;          break;
887     case ASINH:     case ASINH:
888      result=left.asinh();          result=left.asinh();
889      break;          break;
890     case ACOSH:     case ACOSH:
891      result=left.acosh();          result=left.acosh();
892      break;          break;
893     case ATANH:     case ATANH:
894      result=left.atanh();          result=left.atanh();
895      break;          break;
896      case LOG10:      case LOG10:
897      result=left.log10();          result=left.log10();
898      break;          break;
899      case LOG:      case LOG:
900      result=left.log();          result=left.log();
901      break;          break;
902      case SIGN:      case SIGN:
903      result=left.sign();          result=left.sign();
904      break;          break;
905      case ABS:      case ABS:
906      result=left.abs();          result=left.abs();
907      break;          break;
908      case NEG:      case NEG:
909      result=left.neg();          result=left.neg();
910      break;          break;
911      case POS:      case POS:
912      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
913      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
914      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
915      break;          break;
916      case EXP:      case EXP:
917      result=left.exp();          result=left.exp();
918      break;          break;
919      case SQRT:      case SQRT:
920      result=left.sqrt();          result=left.sqrt();
921      break;          break;
922      case RECIP:      case RECIP:
923      result=left.oneOver();          result=left.oneOver();
924      break;          break;
925      case GZ:      case GZ:
926      result=left.wherePositive();          result=left.wherePositive();
927      break;          break;
928      case LZ:      case LZ:
929      result=left.whereNegative();          result=left.whereNegative();
930      break;          break;
931      case GEZ:      case GEZ:
932      result=left.whereNonNegative();          result=left.whereNonNegative();
933      break;          break;
934      case LEZ:      case LEZ:
935      result=left.whereNonPositive();          result=left.whereNonPositive();
936      break;          break;
937      case NEZ:      case NEZ:
938      result=left.whereNonZero(m_tol);          result=left.whereNonZero(m_tol);
939      break;          break;
940      case EZ:      case EZ:
941      result=left.whereZero(m_tol);          result=left.whereZero(m_tol);
942      break;          break;
943      case SYM:      case SYM:
944      result=left.symmetric();          result=left.symmetric();
945      break;          break;
946      case NSYM:      case NSYM:
947      result=left.nonsymmetric();          result=left.nonsymmetric();
948      break;          break;
949      case PROD:      case PROD:
950      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
951      break;          break;
952      case TRANS:      case TRANS:
953      result=left.transpose(m_axis_offset);          result=left.transpose(m_axis_offset);
954      break;          break;
955      case TRACE:      case TRACE:
956      result=left.trace(m_axis_offset);          result=left.trace(m_axis_offset);
957      break;          break;
958      case SWAP:      case SWAP:
959      result=left.swapaxes(m_axis_offset, m_transpose);          result=left.swapaxes(m_axis_offset, m_transpose);
960      break;          break;
961        case MINVAL:
962            result=left.minval();
963            break;
964        case MAXVAL:
965            result=left.minval();
966            break;
967      default:      default:
968      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
969    }    }
970    return result.borrowReadyPtr();    return result.borrowReadyPtr();
971  }  }
# Line 904  DataLazy::collapseToReady() Line 977  DataLazy::collapseToReady()
977     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
978  */  */
979  void  void
980  DataLazy::collapse()  DataLazy::collapse() const
981  {  {
982    if (m_op==IDENTITY)    if (m_op==IDENTITY)
983    {    {
984      return;          return;
985    }    }
986    if (m_readytype=='E')    if (m_readytype=='E')
987    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
988      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
989    }    }
990    m_id=collapseToReady();    m_id=collapseToReady();
991    m_op=IDENTITY;    m_op=IDENTITY;
992  }  }
993    
994  /*  
995    \brief Compute the value of the expression (unary operation) for the given sample.  
996    \return Vector which stores the value of the subexpression for the given sample.  
997    \param v A vector to store intermediate results.  
998    \param offset Index in v to begin storing results.  
999    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1000    \param roffset (output parameter) the offset in the return vector where the result begins.          for (int j=0;j<onumsteps;++j)\
1001            {\
1002    The return value will be an existing vector so do not deallocate it.            for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1003    If the result is stored in v it should be stored at the offset given.            { \
1004    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1005  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1006  DataTypes::ValueType*               tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1007  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1008                 lroffset+=leftstep; \
1009                 rroffset+=rightstep; \
1010              }\
1011              lroffset+=oleftstep;\
1012              rroffset+=orightstep;\
1013            }
1014    
1015    
1016    // The result will be stored in m_samples
1017    // The return value is a pointer to the DataVector, offset is the offset within the return value
1018    const DataTypes::RealVectorType*
1019    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1020  {  {
1021      // we assume that any collapsing has been done before we get here  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1022      // since we only have one argument we don't need to think about only          // collapse so we have a 'E' node or an IDENTITY for some other type
1023      // processing single points.    if (m_readytype!='E' && m_op!=IDENTITY)
1024      {
1025            collapse();
1026      }
1027      if (m_op==IDENTITY)  
1028      {
1029        const RealVectorType& vec=m_id->getVectorRO();
1030        roffset=m_id->getPointOffset(sampleNo, 0);
1031    #ifdef LAZY_STACK_PROF
1032    int x;
1033    if (&x<stackend[omp_get_thread_num()])
1034    {
1035           stackend[omp_get_thread_num()]=&x;
1036    }
1037    #endif
1038        return &(vec);
1039      }
1040      if (m_readytype!='E')
1041      {
1042        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1043      }
1044      if (m_sampleids[tid]==sampleNo)
1045      {
1046            roffset=tid*m_samplesize;
1047            return &(m_samples);            // sample is already resolved
1048      }
1049      m_sampleids[tid]=sampleNo;
1050    
1051      switch (getOpgroup(m_op))
1052      {
1053      case G_UNARY:
1054      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1055      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1056      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1057      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1058      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1059      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1060      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1061      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1062      default:
1063        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1064      }
1065    }
1066    
1067    const DataTypes::RealVectorType*
1068    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1069    {
1070            // we assume that any collapsing has been done before we get here
1071            // since we only have one argument we don't need to think about only
1072            // processing single points.
1073            // we will also know we won't get identity nodes
1074    if (m_readytype!='E')    if (m_readytype!='E')
1075    {    {
1076      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1077    }    }
1078    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1079    const double* left=&((*vleft)[roffset]);    {
1080    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1081    roffset=offset;    }
1082      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1083      const double* left=&((*leftres)[roffset]);
1084      roffset=m_samplesize*tid;
1085      double* result=&(m_samples[roffset]);
1086    switch (m_op)    switch (m_op)
1087    {    {
1088      case SIN:        case SIN:  
1089      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1090      break;          break;
1091      case COS:      case COS:
1092      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1093      break;          break;
1094      case TAN:      case TAN:
1095      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1096      break;          break;
1097      case ASIN:      case ASIN:
1098      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1099      break;          break;
1100      case ACOS:      case ACOS:
1101      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1102      break;          break;
1103      case ATAN:      case ATAN:
1104      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1105      break;          break;
1106      case SINH:      case SINH:
1107      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1108      break;          break;
1109      case COSH:      case COSH:
1110      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1111      break;          break;
1112      case TANH:      case TANH:
1113      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1114      break;          break;
1115      case ERF:      case ERF:
1116  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1117      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");          throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1118  #else  #else
1119      tensor_unary_operation(m_samplesize, left, result, ::erf);          tensor_unary_operation(m_samplesize, left, result, ::erf);
1120      break;          break;
1121  #endif  #endif
1122     case ASINH:     case ASINH:
1123  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1124      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1125  #else  #else
1126      tensor_unary_operation(m_samplesize, left, result, ::asinh);          tensor_unary_operation(m_samplesize, left, result, ::asinh);
1127  #endif    #endif  
1128      break;          break;
1129     case ACOSH:     case ACOSH:
1130  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1131      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1132  #else  #else
1133      tensor_unary_operation(m_samplesize, left, result, ::acosh);          tensor_unary_operation(m_samplesize, left, result, ::acosh);
1134  #endif    #endif  
1135      break;          break;
1136     case ATANH:     case ATANH:
1137  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1138      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1139  #else  #else
1140      tensor_unary_operation(m_samplesize, left, result, ::atanh);          tensor_unary_operation(m_samplesize, left, result, ::atanh);
1141  #endif    #endif  
1142      break;          break;
1143      case LOG10:      case LOG10:
1144      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1145      break;          break;
1146      case LOG:      case LOG:
1147      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1148      break;          break;
1149      case SIGN:      case SIGN:
1150      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1151      break;          break;
1152      case ABS:      case ABS:
1153      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1154      break;          break;
1155      case NEG:      case NEG:
1156      tensor_unary_operation(m_samplesize, left, result, negate<double>());          tensor_unary_operation(m_samplesize, left, result, negate<double>());
1157      break;          break;
1158      case POS:      case POS:
1159      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1160      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1161      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1162      break;          break;
1163      case EXP:      case EXP:
1164      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1165      break;          break;
1166      case SQRT:      case SQRT:
1167      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1168      break;          break;
1169      case RECIP:      case RECIP:
1170      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1171      break;          break;
1172      case GZ:      case GZ:
1173      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1174      break;          break;
1175      case LZ:      case LZ:
1176      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1177      break;          break;
1178      case GEZ:      case GEZ:
1179      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1180      break;          break;
1181      case LEZ:      case LEZ:
1182      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1183      break;          break;
1184  // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1185      case NEZ:      case NEZ:
1186      tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));          tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1187      break;          break;
1188      case EZ:      case EZ:
1189      tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));          tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1190      break;          break;
1191    
1192      default:      default:
1193      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1194    }    }
1195    return &v;    return &(m_samples);
1196  }  }
1197    
1198    
1199    const DataTypes::RealVectorType*
1200    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1201    {
1202            // we assume that any collapsing has been done before we get here
1203            // since we only have one argument we don't need to think about only
1204            // processing single points.
1205            // we will also know we won't get identity nodes
1206      if (m_readytype!='E')
1207      {
1208        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1209      }
1210      if (m_op==IDENTITY)
1211      {
1212        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1213      }
1214      size_t loffset=0;
1215      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1216    
1217      roffset=m_samplesize*tid;
1218      unsigned int ndpps=getNumDPPSample();
1219      unsigned int psize=DataTypes::noValues(m_left->getShape());
1220      double* result=&(m_samples[roffset]);
1221      switch (m_op)
1222      {
1223        case MINVAL:
1224            {
1225              for (unsigned int z=0;z<ndpps;++z)
1226              {
1227                FMin op;
1228                *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1229                loffset+=psize;
1230                result++;
1231              }
1232            }
1233            break;
1234        case MAXVAL:
1235            {
1236              for (unsigned int z=0;z<ndpps;++z)
1237              {
1238              FMax op;
1239              *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1240              loffset+=psize;
1241              result++;
1242              }
1243            }
1244            break;
1245        default:
1246            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1247      }
1248      return &(m_samples);
1249    }
1250    
1251    const DataTypes::RealVectorType*
1252    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1253  {  {
1254      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1255      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1256      // processing single points.          // processing single points.
1257    if (m_readytype!='E')    if (m_readytype!='E')
1258    {    {
1259      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1260    }    }
1261      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1262    size_t subroffset=roffset+m_samplesize;    {
1263  LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1264    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    }
1265    roffset=offset;    size_t subroffset;
1266      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1267      roffset=m_samplesize*tid;
1268    size_t loop=0;    size_t loop=0;
1269    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1270    size_t step=getNoValues();    size_t step=getNoValues();
1271      size_t offset=roffset;
1272    switch (m_op)    switch (m_op)
1273    {    {
1274      case SYM:      case SYM:
1275      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1276      {          {
1277          DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);              DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1278          subroffset+=step;              subroffset+=step;
1279          offset+=step;              offset+=step;
1280      }          }
1281      break;          break;
1282      case NSYM:      case NSYM:
1283      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1284      {          {
1285          DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);              DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1286          subroffset+=step;              subroffset+=step;
1287          offset+=step;              offset+=step;
1288      }          }
1289      break;          break;
1290      default:      default:
1291      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1292    }    }
1293    return &v;    return &m_samples;
1294  }  }
1295    
1296  /*  const DataTypes::RealVectorType*
1297    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1298  {  {
1299      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1300      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1301      // processing single points.          // processing single points.
1302    if (m_readytype!='E')    if (m_readytype!='E')
1303    {    {
1304      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1305      }
1306      if (m_op==IDENTITY)
1307      {
1308        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1309    }    }
     // since we can't write the result over the input, we need a result offset further along  
1310    size_t subroffset;    size_t subroffset;
1311    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    size_t offset;
1312  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1313  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)    roffset=m_samplesize*tid;
1314    roffset=offset;    offset=roffset;
1315    size_t loop=0;    size_t loop=0;
1316    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1317    size_t outstep=getNoValues();    size_t outstep=getNoValues();
1318    size_t instep=m_left->getNoValues();    size_t instep=m_left->getNoValues();
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
1319    switch (m_op)    switch (m_op)
1320    {    {
1321      case TRACE:      case TRACE:
1322      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1323      {          {
1324  size_t zz=sampleNo*getNumDPPSample()+loop;              DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1325  if (zz==5800)              subroffset+=instep;
1326  {              offset+=outstep;
1327  LAZYDEBUG(cerr << "point=" <<  zz<< endl;)          }
1328  LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)          break;
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
1329      case TRANS:      case TRANS:
1330      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1331      {          {
1332              DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);              DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1333          subroffset+=instep;              subroffset+=instep;
1334          offset+=outstep;              offset+=outstep;
1335      }          }
1336      break;          break;
1337      default:      default:
1338      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1339    }    }
1340    return &v;    return &m_samples;
1341  }  }
1342    
1343    
1344  /*  const DataTypes::RealVectorType*
1345    \brief Compute the value of the expression (unary operation with int params) for the given sample.  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1346  {  {
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
1347    if (m_readytype!='E')    if (m_readytype!='E')
1348    {    {
1349      throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1350      }
1351      if (m_op==IDENTITY)
1352      {
1353        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1354    }    }
     // since we can't write the result over the input, we need a result offset further along  
1355    size_t subroffset;    size_t subroffset;
1356    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    size_t offset;
1357  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1358  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)    roffset=m_samplesize*tid;
1359    roffset=offset;    offset=roffset;
1360    size_t loop=0;    size_t loop=0;
1361    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1362    size_t outstep=getNoValues();    size_t outstep=getNoValues();
1363    size_t instep=m_left->getNoValues();    size_t instep=m_left->getNoValues();
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
1364    switch (m_op)    switch (m_op)
1365    {    {
1366      case SWAP:      case SWAP:
1367      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1368      {          {
1369              DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);              DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1370          subroffset+=instep;              subroffset+=instep;
1371          offset+=outstep;              offset+=outstep;
1372      }          }
1373      break;          break;
1374      default:      default:
1375      throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1376    }    }
1377    return &v;    return &m_samples;
1378  }  }
1379    
1380    const DataTypes::RealVectorType*
1381    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1382    {
1383      if (m_readytype!='E')
1384      {
1385        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1386      }
1387      if (m_op!=CONDEVAL)
1388      {
1389        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1390      }
1391      size_t subroffset;
1392    
1393      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1394      const RealVectorType* srcres=0;
1395      if ((*maskres)[subroffset]>0)
1396      {
1397            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1398      }
1399      else
1400      {
1401            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1402      }
1403    
1404  #define PROC_OP(TYPE,X)                               \    // Now we need to copy the result
1405      for (int j=0;j<onumsteps;++j)\  
1406      {\    roffset=m_samplesize*tid;
1407        for (int i=0;i<numsteps;++i,resultp+=resultStep) \    for (int i=0;i<m_samplesize;++i)
1408        { \    {
1409  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\          m_samples[roffset+i]=(*srcres)[subroffset+i];  
1410  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\    }
1411           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  
1412  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \    return &m_samples;
1413           lroffset+=leftstep; \  }
          rroffset+=rightstep; \  
       }\  
       lroffset+=oleftstep;\  
       rroffset+=orightstep;\  
     }  
1414    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1415  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1416  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1417  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 1274  LAZYDEBUG(cout << " result=      " << re Line 1421  LAZYDEBUG(cout << " result=      " << re
1421  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1422  // For example, 2+Vector.  // For example, 2+Vector.
1423  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1424  DataTypes::ValueType*  const DataTypes::RealVectorType*
1425  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1426  {  {
1427  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1428    
1429    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1430      // first work out which of the children are expanded          // first work out which of the children are expanded
1431    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1432    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1433    if (!leftExp && !rightExp)    if (!leftExp && !rightExp)
1434    {    {
1435      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1436    }    }
1437    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1438    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1439    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1440    {    {
1441      throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1442    }    }
1443    size_t leftsize=m_left->getNoValues();    size_t leftsize=m_left->getNoValues();
1444    size_t rightsize=m_right->getNoValues();    size_t rightsize=m_right->getNoValues();
1445    size_t chunksize=1;           // how many doubles will be processed in one go    size_t chunksize=1;                   // how many doubles will be processed in one go
1446    int leftstep=0;       // how far should the left offset advance after each step    int leftstep=0;               // how far should the left offset advance after each step
1447    int rightstep=0;    int rightstep=0;
1448    int numsteps=0;       // total number of steps for the inner loop    int numsteps=0;               // total number of steps for the inner loop
1449    int oleftstep=0;  // the o variables refer to the outer loop    int oleftstep=0;      // the o variables refer to the outer loop
1450    int orightstep=0; // The outer loop is only required in cases where there is an extended scalar    int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1451    int onumsteps=1;    int onumsteps=1;
1452        
1453    bool LES=(leftExp && leftScalar); // Left is an expanded scalar    bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1454    bool RES=(rightExp && rightScalar);    bool RES=(rightExp && rightScalar);
1455    bool LS=(!leftExp && leftScalar); // left is a single scalar    bool LS=(!leftExp && leftScalar);     // left is a single scalar
1456    bool RS=(!rightExp && rightScalar);    bool RS=(!rightExp && rightScalar);
1457    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1458    bool RN=(!rightExp && !rightScalar);    bool RN=(!rightExp && !rightScalar);
1459    bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar    bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1460    bool REN=(rightExp && !rightScalar);    bool REN=(rightExp && !rightScalar);
1461    
1462    if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars    if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1463    {    {
1464      chunksize=m_left->getNumDPPSample()*leftsize;          chunksize=m_left->getNumDPPSample()*leftsize;
1465      leftstep=0;          leftstep=0;
1466      rightstep=0;          rightstep=0;
1467      numsteps=1;          numsteps=1;
1468    }    }
1469    else if (LES || RES)    else if (LES || RES)
1470    {    {
1471      chunksize=1;          chunksize=1;
1472      if (LES)        // left is an expanded scalar          if (LES)                // left is an expanded scalar
1473      {          {
1474          if (RS)                  if (RS)
1475          {                  {
1476             leftstep=1;                     leftstep=1;
1477             rightstep=0;                     rightstep=0;
1478             numsteps=m_left->getNumDPPSample();                     numsteps=m_left->getNumDPPSample();
1479          }                  }
1480          else        // RN or REN                  else            // RN or REN
1481          {                  {
1482             leftstep=0;                     leftstep=0;
1483             oleftstep=1;                     oleftstep=1;
1484             rightstep=1;                     rightstep=1;
1485             orightstep=(RN ? -(int)rightsize : 0);                     orightstep=(RN ? -(int)rightsize : 0);
1486             numsteps=rightsize;                     numsteps=rightsize;
1487             onumsteps=m_left->getNumDPPSample();                     onumsteps=m_left->getNumDPPSample();
1488          }                  }
1489      }          }
1490      else        // right is an expanded scalar          else            // right is an expanded scalar
1491      {          {
1492          if (LS)                  if (LS)
1493          {                  {
1494             rightstep=1;                     rightstep=1;
1495             leftstep=0;                     leftstep=0;
1496             numsteps=m_right->getNumDPPSample();                     numsteps=m_right->getNumDPPSample();
1497          }                  }
1498          else                  else
1499          {                  {
1500             rightstep=0;                     rightstep=0;
1501             orightstep=1;                     orightstep=1;
1502             leftstep=1;                     leftstep=1;
1503             oleftstep=(LN ? -(int)leftsize : 0);                     oleftstep=(LN ? -(int)leftsize : 0);
1504             numsteps=leftsize;                     numsteps=leftsize;
1505             onumsteps=m_right->getNumDPPSample();                     onumsteps=m_right->getNumDPPSample();
1506          }                  }
1507      }          }
1508    }    }
1509    else  // this leaves (LEN, RS), (LEN, RN) and their transposes    else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1510    {    {
1511      if (LEN)    // and Right will be a single value          if (LEN)        // and Right will be a single value
1512      {          {
1513          chunksize=rightsize;                  chunksize=rightsize;
1514          leftstep=rightsize;                  leftstep=rightsize;
1515          rightstep=0;                  rightstep=0;
1516          numsteps=m_left->getNumDPPSample();                  numsteps=m_left->getNumDPPSample();
1517          if (RS)                  if (RS)
1518          {                  {
1519             numsteps*=leftsize;                     numsteps*=leftsize;
1520          }                  }
1521      }          }
1522      else    // REN          else    // REN
1523      {          {
1524          chunksize=leftsize;                  chunksize=leftsize;
1525          rightstep=leftsize;                  rightstep=leftsize;
1526          leftstep=0;                  leftstep=0;
1527          numsteps=m_right->getNumDPPSample();                  numsteps=m_right->getNumDPPSample();
1528          if (LS)                  if (LS)
1529          {                  {
1530             numsteps*=rightsize;                     numsteps*=rightsize;
1531          }                  }
1532      }          }
1533    }    }
1534    
1535    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0    int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1536      // Get the values of sub-expressions          // Get the values of sub-expressions
1537    const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on    const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1538      // calcBufss for why we can't put offset as the 2nd param above    const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
   const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
1539  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1540  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1541  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
# Line 1399  LAZYDEBUG(cout << "onumsteps=" << onumst Line 1544  LAZYDEBUG(cout << "onumsteps=" << onumst
1544  LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1545  LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1546    
1547    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1548    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1549    
1550    
1551    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    roffset=m_samplesize*tid;
1552      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1553    switch(m_op)    switch(m_op)
1554    {    {
1555      case ADD:      case ADD:
1556          PROC_OP(NO_ARG,plus<double>());          //PROC_OP(NO_ARG,plus<double>());
1557      break;        DataMaths::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1558                 &(*left)[0],
1559                 &(*right)[0],
1560                 chunksize,
1561                 onumsteps,
1562                 numsteps,
1563                 resultStep,
1564                 leftstep,
1565                 rightstep,
1566                 oleftstep,
1567                 orightstep,
1568                 escript::ESFunction::PLUSF);  
1569            break;
1570      case SUB:      case SUB:
1571      PROC_OP(NO_ARG,minus<double>());          PROC_OP(NO_ARG,minus<double>());
1572      break;          break;
1573      case MUL:      case MUL:
1574      PROC_OP(NO_ARG,multiplies<double>());          PROC_OP(NO_ARG,multiplies<double>());
1575      break;          break;
1576      case DIV:      case DIV:
1577      PROC_OP(NO_ARG,divides<double>());          PROC_OP(NO_ARG,divides<double>());
1578      break;          break;
1579      case POW:      case POW:
1580         PROC_OP(double (double,double),::pow);         PROC_OP(double (double,double),::pow);
1581      break;          break;
1582      default:      default:
1583      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1584    }    }
1585    roffset=offset;  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1586    return &v;    return &m_samples;
1587  }  }
1588    
1589    
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1590  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1591  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1592  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1593  DataTypes::ValueType*  const DataTypes::RealVectorType*
1594  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1595  {  {
1596  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1597    
1598    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1599      // first work out which of the children are expanded          // first work out which of the children are expanded
1600    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1601    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1602    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1603  /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
   int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method  
1604    int rightStep=(rightExp?m_right->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1605    
1606    int resultStep=getNoValues();    int resultStep=getNoValues();
1607      // Get the values of sub-expressions (leave a gap of one sample for the result).    roffset=m_samplesize*tid;
1608    int gap=offset+m_samplesize;      size_t offset=roffset;
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
   
   const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);  
   gap+=m_left->getMaxSampleSize();  
   
1609    
1610  LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)    const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1611    
1612      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
   const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);  
1613    
1614  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1615  cout << getNoValues() << endl;)  cout << getNoValues() << endl;)
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
1616    
1617  for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
1618  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1619  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1620  LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
# Line 1499  LAZYDEBUG(cout << "m_samplesize=" << m_s Line 1623  LAZYDEBUG(cout << "m_samplesize=" << m_s
1623  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1624  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1625    
1626    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1627    switch(m_op)    switch(m_op)
1628    {    {
1629      case PROD:      case PROD:
1630      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1631      {          {
1632              const double *ptr_0 = &((*left)[lroffset]);
1633  LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)            const double *ptr_1 = &((*right)[rroffset]);
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
           const double *ptr_0 = &((*left)[lroffset]);  
           const double *ptr_1 = &((*right)[rroffset]);  
1634    
1635  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1636  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1637    
1638            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1639    
1640  LAZYDEBUG(cout << "Results--\n";            lroffset+=leftStep;
1641  {            rroffset+=rightStep;
1642    DataVector dv(getNoValues());          }
1643  for (int z=0;z<getNoValues();++z)          break;
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
       lroffset+=leftStep;  
       rroffset+=rightStep;  
     }  
     break;  
1644      default:      default:
1645      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1646    }    }
1647    roffset=offset;    roffset=offset;
1648    return &v;    return &m_samples;
1649  }  }
1650    
1651    
1652    const DataTypes::RealVectorType*
1653  /*  DataLazy::resolveSample(int sampleNo, size_t& roffset) const
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
   
 }  
   
 const DataTypes::ValueType*  
 DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  
1654  {  {
1655  #ifdef _OPENMP  #ifdef _OPENMP
1656      int tid=omp_get_thread_num();          int tid=omp_get_thread_num();
1657  #else  #else
1658      int tid=0;          int tid=0;
1659  #endif  #endif
1660      return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);  
1661    #ifdef LAZY_STACK_PROF
1662            stackstart[tid]=&tid;
1663            stackend[tid]=&tid;
1664            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1665            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1666            #pragma omp critical
1667            if (d>maxstackuse)
1668            {
1669    cout << "Max resolve Stack use " << d << endl;
1670                    maxstackuse=d;
1671            }
1672            return r;
1673    #else
1674            return resolveNodeSample(tid, sampleNo, roffset);
1675    #endif
1676  }  }
1677    
1678    
1679  // This needs to do the work of the idenity constructor  // This needs to do the work of the identity constructor
1680  void  void
1681  DataLazy::resolveToIdentity()  DataLazy::resolveToIdentity()
1682  {  {
1683     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1684      return;          return;
1685     DataReady_ptr p=resolve();     DataReady_ptr p=resolveNodeWorker();
1686     makeIdentity(p);     makeIdentity(p);
1687  }  }
1688    
# Line 1635  void DataLazy::makeIdentity(const DataRe Line 1698  void DataLazy::makeIdentity(const DataRe
1698     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1699     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1700     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1701     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1702     m_left.reset();     m_left.reset();
1703     m_right.reset();     m_right.reset();
1704  }  }
1705    
1706  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
1707  DataReady_ptr  DataReady_ptr
1708  DataLazy::resolve()  DataLazy::resolve()
1709  {  {
1710        resolveToIdentity();
1711        return m_id;
1712    }
1713    
1714    
1715  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  /* This is really a static method but I think that caused problems in windows */
1716  LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  void
1717    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally  DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1718    {
1719      if (dats.empty())
1720      {
1721            return;
1722      }
1723      vector<DataLazy*> work;
1724      FunctionSpace fs=dats[0]->getFunctionSpace();
1725      bool match=true;
1726      for (int i=dats.size()-1;i>=0;--i)
1727      {
1728            if (dats[i]->m_readytype!='E')
1729            {
1730                    dats[i]->collapse();
1731            }
1732            if (dats[i]->m_op!=IDENTITY)
1733            {
1734                    work.push_back(dats[i]);
1735                    if (fs!=dats[i]->getFunctionSpace())
1736                    {
1737                            match=false;
1738                    }
1739            }
1740      }
1741      if (work.empty())
1742      {
1743            return;         // no work to do
1744      }
1745      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1746      {             // it is possible that dats[0] is one of the objects which we discarded and
1747                    // all the other functionspaces match.
1748            vector<DataExpanded*> dep;
1749            vector<RealVectorType*> vecs;
1750            for (int i=0;i<work.size();++i)
1751            {
1752                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1753                    vecs.push_back(&(dep[i]->getVectorRW()));
1754            }
1755            int totalsamples=work[0]->getNumSamples();
1756            const RealVectorType* res=0; // Storage for answer
1757            int sample;
1758            #pragma omp parallel private(sample, res)
1759            {
1760                size_t roffset=0;
1761                #pragma omp for schedule(static)
1762                for (sample=0;sample<totalsamples;++sample)
1763                {
1764                    roffset=0;
1765                    int j;
1766                    for (j=work.size()-1;j>=0;--j)
1767                    {
1768    #ifdef _OPENMP
1769                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1770    #else
1771                        res=work[j]->resolveNodeSample(0,sample,roffset);
1772    #endif
1773                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1774                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1775                    }
1776                }
1777            }
1778            // Now we need to load the new results as identity ops into the lazy nodes
1779            for (int i=work.size()-1;i>=0;--i)
1780            {
1781                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1782            }
1783      }
1784      else  // functionspaces do not match
1785      {
1786            for (int i=0;i<work.size();++i)
1787            {
1788                    work[i]->resolveToIdentity();
1789            }
1790      }
1791    }
1792    
1793    
1794    
1795    // This version of resolve uses storage in each node to hold results
1796    DataReady_ptr
1797    DataLazy::resolveNodeWorker()
1798    {
1799      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1800    {    {
1801      collapse();      collapse();
1802    }    }
1803    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1804    {    {
1805      return m_id;      return m_id;
1806    }    }
1807      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1808    size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1809      // storage to evaluate its expression    RealVectorType& resvec=result->getVectorRW();
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVectorRW();  
1810    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1811    
1812    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1813    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1814    const ValueType* res=0;   // Vector storing the answer    const RealVectorType* res=0;       // Storage for answer
   size_t resoffset=0;       // where in the vector to find the answer  
1815  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1816    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1817    {    {
1818        if (sample==0)  {ENABLEDEBUG}          size_t roffset=0;
1819    #ifdef LAZY_STACK_PROF
1820  //       if (sample==5800/getNumDPPSample())  {ENABLEDEBUG}          stackstart[omp_get_thread_num()]=&roffset;
1821  LAZYDEBUG(cout << "################################# " << sample << endl;)          stackend[omp_get_thread_num()]=&roffset;
1822    #endif
1823            #pragma omp for schedule(static)
1824            for (sample=0;sample<totalsamples;++sample)
1825            {
1826                    roffset=0;
1827  #ifdef _OPENMP  #ifdef _OPENMP
1828      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1829  #else  #else
1830      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1831  #endif  #endif
1832  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1833  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1834      outoffset=result->getPointOffset(sample,0);                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1835  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1836      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector          }
1837      {    }
1838  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  #ifdef LAZY_STACK_PROF
1839      resvec[outoffset]=(*res)[resoffset];    for (int i=0;i<getNumberOfThreads();++i)
1840      }    {
1841  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)          size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1842  LAZYDEBUG(cerr << "*********************************" << endl;)  //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1843      DISABLEDEBUG          if (r>maxstackuse)
1844            {
1845                    maxstackuse=r;
1846            }
1847    }    }
1848      cout << "Max resolve Stack use=" << maxstackuse << endl;
1849    #endif
1850    return resptr;    return resptr;
1851  }  }
1852    
# Line 1708  std::string Line 1854  std::string
1854  DataLazy::toString() const  DataLazy::toString() const
1855  {  {
1856    ostringstream oss;    ostringstream oss;
1857    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1858    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1859      {
1860      case 1:       // tree format
1861            oss << endl;
1862            intoTreeString(oss,"");
1863            break;
1864      case 2:       // just the depth
1865            break;
1866      default:
1867            intoString(oss);
1868            break;
1869      }
1870    return oss.str();    return oss.str();
1871  }  }
1872    
# Line 1721  DataLazy::intoString(ostringstream& oss) Line 1878  DataLazy::intoString(ostringstream& oss)
1878    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1879    {    {
1880    case G_IDENTITY:    case G_IDENTITY:
1881      if (m_id->isExpanded())          if (m_id->isExpanded())
1882      {          {
1883         oss << "E";             oss << "E";
1884      }          }
1885      else if (m_id->isTagged())          else if (m_id->isTagged())
1886      {          {
1887        oss << "T";            oss << "T";
1888      }          }
1889      else if (m_id->isConstant())          else if (m_id->isConstant())
1890      {          {
1891        oss << "C";            oss << "C";
1892      }          }
1893      else          else
1894      {          {
1895        oss << "?";            oss << "?";
1896      }          }
1897      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1898      break;          break;
1899      case G_BINARY:
1900            oss << '(';
1901            m_left->intoString(oss);
1902            oss << ' ' << opToString(m_op) << ' ';
1903            m_right->intoString(oss);
1904            oss << ')';
1905            break;
1906      case G_UNARY:
1907      case G_UNARY_P:
1908      case G_NP1OUT:
1909      case G_NP1OUT_P:
1910      case G_REDUCTION:
1911            oss << opToString(m_op) << '(';
1912            m_left->intoString(oss);
1913            oss << ')';
1914            break;
1915      case G_TENSORPROD:
1916            oss << opToString(m_op) << '(';
1917            m_left->intoString(oss);
1918            oss << ", ";
1919            m_right->intoString(oss);
1920            oss << ')';
1921            break;
1922      case G_NP1OUT_2P:
1923            oss << opToString(m_op) << '(';
1924            m_left->intoString(oss);
1925            oss << ", " << m_axis_offset << ", " << m_transpose;
1926            oss << ')';
1927            break;
1928      case G_CONDEVAL:
1929            oss << opToString(m_op)<< '(' ;
1930            m_mask->intoString(oss);
1931            oss << " ? ";
1932            m_left->intoString(oss);
1933            oss << " : ";
1934            m_right->intoString(oss);
1935            oss << ')';
1936            break;
1937      default:
1938            oss << "UNKNOWN";
1939      }
1940    }
1941    
1942    
1943    void
1944    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1945    {
1946      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1947      switch (getOpgroup(m_op))
1948      {
1949      case G_IDENTITY:
1950            if (m_id->isExpanded())
1951            {
1952               oss << "E";
1953            }
1954            else if (m_id->isTagged())
1955            {
1956              oss << "T";
1957            }
1958            else if (m_id->isConstant())
1959            {
1960              oss << "C";
1961            }
1962            else
1963            {
1964              oss << "?";
1965            }
1966            oss << '@' << m_id.get() << endl;
1967            break;
1968    case G_BINARY:    case G_BINARY:
1969      oss << '(';          oss << opToString(m_op) << endl;
1970      m_left->intoString(oss);          indent+='.';
1971      oss << ' ' << opToString(m_op) << ' ';          m_left->intoTreeString(oss, indent);
1972      m_right->intoString(oss);          m_right->intoTreeString(oss, indent);
1973      oss << ')';          break;
     break;  
1974    case G_UNARY:    case G_UNARY:
1975    case G_UNARY_P:    case G_UNARY_P:
1976    case G_NP1OUT:    case G_NP1OUT:
1977    case G_NP1OUT_P:    case G_NP1OUT_P:
1978      oss << opToString(m_op) << '(';    case G_REDUCTION:
1979      m_left->intoString(oss);          oss << opToString(m_op) << endl;
1980      oss << ')';          indent+='.';
1981      break;          m_left->intoTreeString(oss, indent);
1982            break;
1983    case G_TENSORPROD:    case G_TENSORPROD:
1984      oss << opToString(m_op) << '(';          oss << opToString(m_op) << endl;
1985      m_left->intoString(oss);          indent+='.';
1986      oss << ", ";          m_left->intoTreeString(oss, indent);
1987      m_right->intoString(oss);          m_right->intoTreeString(oss, indent);
1988      oss << ')';          break;
     break;  
1989    case G_NP1OUT_2P:    case G_NP1OUT_2P:
1990      oss << opToString(m_op) << '(';          oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1991      m_left->intoString(oss);          indent+='.';
1992      oss << ", " << m_axis_offset << ", " << m_transpose;          m_left->intoTreeString(oss, indent);
1993      oss << ')';          break;
     break;  
1994    default:    default:
1995      oss << "UNKNOWN";          oss << "UNKNOWN";
1996    }    }
1997  }  }
1998    
1999    
2000  DataAbstract*  DataAbstract*
2001  DataLazy::deepCopy()  DataLazy::deepCopy() const
2002  {  {
2003    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2004    {    {
2005    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2006    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
2007    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2008      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2009      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2010    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2011    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2012      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2013      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2014    default:    default:
2015      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2016    }    }
2017  }  }
2018    
2019    
2020    
2021  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2022  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2023  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
2024  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2025  // form part of the expression.  // form part of the expression.
2026  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2027  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2028  DataLazy::getLength() const  DataLazy::getLength() const
2029  {  {
2030    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1809  DataLazy::getSlice(const DataTypes::Regi Line 2039  DataLazy::getSlice(const DataTypes::Regi
2039    
2040    
2041  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2042  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2043  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2044                   int dataPointNo)                   int dataPointNo)
2045  {  {
2046    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2047    {    {
2048      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2049    }    }
2050    if (m_readytype!='E')    if (m_readytype!='E')
2051    {    {
2052      collapse();          collapse();
2053      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2054    }    }
2055    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
2056    // so we only need to know which child to ask    // so we only need to know which child to ask
2057    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
2058    {    {
2059      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2060    }    }
2061    else    else
2062    {    {
2063      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2064    }    }
2065  }  }
2066    
2067  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2068  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2069  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2070                   int dataPointNo) const                   int dataPointNo) const
2071  {  {
2072    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2073    {    {
2074      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2075    }    }
2076    if (m_readytype=='E')    if (m_readytype=='E')
2077    {    {
# Line 1849  DataLazy::getPointOffset(int sampleNo, Line 2079  DataLazy::getPointOffset(int sampleNo,
2079      // so we only need to know which child to ask      // so we only need to know which child to ask
2080      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
2081      {      {
2082      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2083      }      }
2084      else      else
2085      {      {
2086      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2087      }      }
2088    }    }
2089    if (m_readytype=='C')    if (m_readytype=='C')
2090    {    {
2091      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2092    }    }
2093    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2094  }  }
# Line 1868  DataLazy::getPointOffset(int sampleNo, Line 2098  DataLazy::getPointOffset(int sampleNo,
2098  void  void
2099  DataLazy::setToZero()  DataLazy::setToZero()
2100  {  {
2101  //   DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2102  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2103  //   m_op=IDENTITY;  //   m_op=IDENTITY;
2104  //   m_right.reset();    //   m_right.reset();  
# Line 1876  DataLazy::setToZero() Line 2106  DataLazy::setToZero()
2106  //   m_readytype='C';  //   m_readytype='C';
2107  //   m_buffsRequired=1;  //   m_buffsRequired=1;
2108    
2109      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2110    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
   
2111  }  }
2112    
2113  bool  bool
2114  DataLazy::actsExpanded() const  DataLazy::actsExpanded() const
2115  {  {
2116      return (m_readytype=='E');          return (m_readytype=='E');
2117  }  }
2118    
2119  }   // end namespace  } // end namespace
2120    

Legend:
Removed from v.2496  
changed lines
  Added in v.6042

  ViewVC Help
Powered by ViewVC 1.1.26