/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2770 by jfenwick, Wed Nov 25 01:24:51 2009 UTC trunk/escriptcore/src/DataLazy.cpp revision 6125 by jfenwick, Tue Apr 5 04:12:13 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2009 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Apache License, version 2.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.apache.org/licenses/LICENSE-2.0
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18    #include "Data.h"
19    #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "Utils.h"
23    #include "DataVectorOps.h"
24    
25  #ifdef USE_NETCDF  #ifdef USE_NETCDF
26  #include <netcdfcpp.h>  #include <netcdfcpp.h>
27  #endif  #endif
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
 #include "Data.h"  
 #include "UnaryFuncs.h"     // for escript::fsign  
 #include "Utils.h"  
28    
29  #include "EscriptParams.h"  #include <iomanip> // for some fancy formatting in debug
30    
31    using namespace escript::DataTypes;
32    
33  #include <iomanip>      // for some fancy formatting in debug  #define NO_ARG
34    
35  // #define LAZYDEBUG(X) if (privdebug){X;}  // #define LAZYDEBUG(X) if (privdebug){X;}
36  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
# Line 44  bool privdebug=false; Line 44  bool privdebug=false;
44    
45  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48    
49    
50    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52  /*  /*
53  How does DataLazy work?  How does DataLazy work?
# Line 58  A special operation, IDENTITY, stores an Line 60  A special operation, IDENTITY, stores an
60  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
64    
65  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 66  I will refer to individual DataLazy obje Line 68  I will refer to individual DataLazy obje
68    
69  Each node also stores:  Each node also stores:
70  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71      evaluated.          evaluated.
72  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
73      evaluate the expression.          evaluate the expression.
74  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
75    
76  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 109  namespace escript Line 111  namespace escript
111  namespace  namespace
112  {  {
113    
114    
115    // enabling this will print out when ever the maximum stacksize used by resolve increases
116    // it assumes _OPENMP is also in use
117    //#define LAZY_STACK_PROF
118    
119    
120    
121    #ifndef _OPENMP
122      #ifdef LAZY_STACK_PROF
123      #undef LAZY_STACK_PROF
124      #endif
125    #endif
126    
127    
128    #ifdef LAZY_STACK_PROF
129    std::vector<void*> stackstart(getNumberOfThreads());
130    std::vector<void*> stackend(getNumberOfThreads());
131    size_t maxstackuse=0;
132    #endif
133    
134  enum ES_opgroup  enum ES_opgroup
135  {  {
136     G_UNKNOWN,     G_UNKNOWN,
137     G_IDENTITY,     G_IDENTITY,
138     G_BINARY,        // pointwise operations with two arguments     G_BINARY,            // pointwise operations with two arguments
139     G_UNARY,     // pointwise operations with one argument     G_UNARY,             // pointwise operations with one argument
140     G_UNARY_P,       // pointwise operations with one argument, requiring a parameter     G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
141     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,            // non-pointwise op with one output
142     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
143     G_TENSORPROD,    // general tensor product     G_TENSORPROD,        // general tensor product
144     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params     G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
145     G_REDUCTION      // non-pointwise unary op with a scalar output     G_REDUCTION,         // non-pointwise unary op with a scalar output
146       G_CONDEVAL
147  };  };
148    
149    
150    
151    
152  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
153              "sin","cos","tan",                          "sin","cos","tan",
154              "asin","acos","atan","sinh","cosh","tanh","erf",                          "asin","acos","atan","sinh","cosh","tanh","erf",
155              "asinh","acosh","atanh",                          "asinh","acosh","atanh",
156              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "log10","log","sign","abs","neg","pos","exp","sqrt",
157              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
158              "symmetric","nonsymmetric",                          "symmetric","antisymmetric",
159              "prod",                          "prod",
160              "transpose", "trace",                          "transpose", "trace",
161              "swapaxes",                          "swapaxes",
162              "minval", "maxval"};                          "minval", "maxval",
163  int ES_opcount=43;                          "condEval",
164                            "hermitian","antihermitian"
165    };
166    int ES_opcount=46;
167  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
168              G_UNARY,G_UNARY,G_UNARY, //10                          G_UNARY,G_UNARY,G_UNARY, //10
169              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
170              G_UNARY,G_UNARY,G_UNARY,                    // 20                          G_UNARY,G_UNARY,G_UNARY,                                        // 20
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
172              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
173              G_NP1OUT,G_NP1OUT,                          G_NP1OUT,G_NP1OUT,
174              G_TENSORPROD,                          G_TENSORPROD,
175              G_NP1OUT_P, G_NP1OUT_P,                          G_NP1OUT_P, G_NP1OUT_P,
176              G_NP1OUT_2P,                          G_NP1OUT_2P,
177              G_REDUCTION, G_REDUCTION};                          G_REDUCTION, G_REDUCTION,
178                            G_CONDEVAL,
179                            G_UNARY,G_UNARY
180    };
181  inline  inline
182  ES_opgroup  ES_opgroup
183  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 160  getOpgroup(ES_optype op) Line 189  getOpgroup(ES_optype op)
189  FunctionSpace  FunctionSpace
190  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
191  {  {
192      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
193      // maybe we need an interpolate node -          // maybe we need an interpolate node -
194      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
195      // programming error exception.          // programming error exception.
196    
197    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
198    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
199    if (l!=r)    if (l!=r)
200    {    {
201      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
202        if (res==1)
203      {      {
204      return l;          return l;
205      }      }
206      if (l.probeInterpolation(r))      if (res==-1)
207      {      {
208      return r;          return r;
209      }      }
210      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
211    }    }
# Line 187  resultFS(DataAbstract_ptr left, DataAbst Line 217  resultFS(DataAbstract_ptr left, DataAbst
217  DataTypes::ShapeType  DataTypes::ShapeType
218  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
219  {  {
220      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
221      {          {
222        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
223        {            {
224          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
225        }            }
226    
227        if (left->getRank()==0)   // we need to allow scalar * anything            if (left->getRank()==0)       // we need to allow scalar * anything
228        {            {
229          return right->getShape();                  return right->getShape();
230        }            }
231        if (right->getRank()==0)            if (right->getRank()==0)
232        {            {
233          return left->getShape();                  return left->getShape();
234        }            }
235        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
236      }          }
237      return left->getShape();          return left->getShape();
238  }  }
239    
240  // return the shape for "op left"  // return the shape for "op left"
# Line 212  resultShape(DataAbstract_ptr left, DataA Line 242  resultShape(DataAbstract_ptr left, DataA
242  DataTypes::ShapeType  DataTypes::ShapeType
243  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
244  {  {
245      switch(op)          switch(op)
246      {          {
247          case TRANS:          case TRANS:
248         {            // for the scoping of variables             {                    // for the scoping of variables
249          const DataTypes::ShapeType& s=left->getShape();                  const DataTypes::ShapeType& s=left->getShape();
250          DataTypes::ShapeType sh;                  DataTypes::ShapeType sh;
251          int rank=left->getRank();                  int rank=left->getRank();
252          if (axis_offset<0 || axis_offset>rank)                  if (axis_offset<0 || axis_offset>rank)
253          {                  {
254                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
255              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
256              for (int i=0; i<rank; i++)              throw DataException(e.str());
257          {          }
258             int index = (axis_offset+i)%rank;          for (int i=0; i<rank; i++)
259                 sh.push_back(s[index]); // Append to new shape                  {
260              }                     int index = (axis_offset+i)%rank;
261          return sh;             sh.push_back(s[index]); // Append to new shape
262         }          }
263      break;                  return sh;
264      case TRACE:             }
265         {          break;
266          int rank=left->getRank();          case TRACE:
267          if (rank<2)             {
268          {                  int rank=left->getRank();
269             throw DataException("Trace can only be computed for objects with rank 2 or greater.");                  if (rank<2)
270          }                  {
271          if ((axis_offset>rank-2) || (axis_offset<0))                     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
272          {                  }
273             throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");                  if ((axis_offset>rank-2) || (axis_offset<0))
274          }                  {
275          if (rank==2)                     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
276          {                  }
277             return DataTypes::scalarShape;                  if (rank==2)
278          }                  {
279          else if (rank==3)                     return DataTypes::scalarShape;
280          {                  }
281             DataTypes::ShapeType sh;                  else if (rank==3)
282                 if (axis_offset==0)                  {
283             {                     DataTypes::ShapeType sh;
284                  sh.push_back(left->getShape()[2]);                     if (axis_offset==0)
285                 }                     {
286                 else     // offset==1                          sh.push_back(left->getShape()[2]);
287             {                     }
288              sh.push_back(left->getShape()[0]);                     else         // offset==1
289                 }                     {
290             return sh;                          sh.push_back(left->getShape()[0]);
291          }                     }
292          else if (rank==4)                     return sh;
293          {                  }
294             DataTypes::ShapeType sh;                  else if (rank==4)
295             const DataTypes::ShapeType& s=left->getShape();                  {
296                 if (axis_offset==0)                     DataTypes::ShapeType sh;
297             {                     const DataTypes::ShapeType& s=left->getShape();
298                  sh.push_back(s[2]);                     if (axis_offset==0)
299                  sh.push_back(s[3]);                     {
300                 }                          sh.push_back(s[2]);
301                 else if (axis_offset==1)                          sh.push_back(s[3]);
302             {                     }
303                  sh.push_back(s[0]);                     else if (axis_offset==1)
304                  sh.push_back(s[3]);                     {
305                 }                          sh.push_back(s[0]);
306             else     // offset==2                          sh.push_back(s[3]);
307             {                     }
308              sh.push_back(s[0]);                     else         // offset==2
309              sh.push_back(s[1]);                     {
310             }                          sh.push_back(s[0]);
311             return sh;                          sh.push_back(s[1]);
312          }                     }
313          else        // unknown rank                     return sh;
314          {                  }
315             throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");                  else            // unknown rank
316          }                  {
317         }                     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
318      break;                  }
319          default:             }
320      throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");          break;
321      }          default:
322            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
323            }
324  }  }
325    
326  DataTypes::ShapeType  DataTypes::ShapeType
# Line 306  SwapShape(DataAbstract_ptr left, const i Line 338  SwapShape(DataAbstract_ptr left, const i
338          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
339       }       }
340       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
341          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
342            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
343            throw DataException(e.str());
344       }       }
345       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
346           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          stringstream e;
347            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
348            throw DataException(e.str());
349       }       }
350       if (axis0 == axis1) {       if (axis0 == axis1) {
351           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 340  SwapShape(DataAbstract_ptr left, const i Line 376  SwapShape(DataAbstract_ptr left, const i
376  DataTypes::ShapeType  DataTypes::ShapeType
377  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
378  {  {
379                
380    // Get rank and shape of inputs    // Get rank and shape of inputs
381    int rank0 = left->getRank();    int rank0 = left->getRank();
382    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 349  GTPShape(DataAbstract_ptr left, DataAbst Line 385  GTPShape(DataAbstract_ptr left, DataAbst
385    
386    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
387    int start0=0, start1=0;    int start0=0, start1=0;
388    if (transpose == 0)       {}    if (transpose == 0)           {}
389    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
390    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
391    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
392    
393    if (rank0<axis_offset)    if (rank0<axis_offset)
394    {    {
395      throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");          throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
396    }    }
397    
398    // Adjust the shapes for transpose    // Adjust the shapes for transpose
399    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
400    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
401    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
402    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
403    
404    // Prepare for the loops of the product    // Prepare for the loops of the product
405    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
406    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
407      SL *= tmpShape0[i];      SL *= tmpShape0[i];
408    }    }
409    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
410      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
411        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
412      }      }
413      SM *= tmpShape0[i];      SM *= tmpShape0[i];
414    }    }
415    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
416      SR *= tmpShape1[i];      SR *= tmpShape1[i];
417    }    }
418    
419    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
420    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
421    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
422       int out_index=0;       int out_index=0;
423       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
424       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
# Line 398  GTPShape(DataAbstract_ptr left, DataAbst Line 434  GTPShape(DataAbstract_ptr left, DataAbst
434    return shape2;    return shape2;
435  }  }
436    
437  }   // end anonymous namespace  }       // end anonymous namespace
438    
439    
440    
# Line 433  void DataLazy::LazyNodeSetup() Line 469  void DataLazy::LazyNodeSetup()
469    
470  // Creates an identity node  // Creates an identity node
471  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
472      : parent(p->getFunctionSpace(),p->getShape())          : parent(p->getFunctionSpace(),p->getShape())
473      ,m_sampleids(0),          ,m_sampleids(0),
474      m_samples(1)          m_samples(1)
475  {  {
476     if (p->isLazy())     if (p->isLazy())
477     {     {
478      // I don't want identity of Lazy.          // I don't want identity of Lazy.
479      // Question: Why would that be so bad?          // Question: Why would that be so bad?
480      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
481      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
482     }     }
483     else     else
484     {     {
485      p->makeLazyShared();          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
486      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);          makeIdentity(dr);
     makeIdentity(dr);  
487  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
488     }     }
489  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
490  }  }
491    
492  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
493      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
494      m_op(op),          m_op(op),
495      m_axis_offset(0),          m_axis_offset(0),
496      m_transpose(0),          m_transpose(0),
497      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
498  {  {
499     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
500     {     {
501      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
502     }     }
503    
504     DataLazy_ptr lleft;     DataLazy_ptr lleft;
505     if (!left->isLazy())     if (!left->isLazy())
506     {     {
507      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
508     }     }
509     else     else
510     {     {
511      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
512     }     }
513     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
514     m_left=lleft;     m_left=lleft;
# Line 487  DataLazy::DataLazy(DataAbstract_ptr left Line 522  DataLazy::DataLazy(DataAbstract_ptr left
522    
523  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
524  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
525      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
526      m_op(op),          m_op(op),
527      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
528  {  {
529  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
530     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
531     {     {
532      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
533     }     }
534    
535     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
536     {     {
537      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
538      Data ltemp(left);          Data ltemp(left);
539      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
540      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
541     }     }
542     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
543     {     {
544      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
545      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
546  LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)  LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
547     }     }
548     left->operandCheck(*right);     left->operandCheck(*right);
549    
550     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
551     {     {
552      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
553  LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)  LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
554     }     }
555     else     else
556     {     {
557      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
558  LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)  LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
559     }     }
560     if (right->isLazy())     if (right->isLazy())
561     {     {
562      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
563  LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)  LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
564     }     }
565     else     else
566     {     {
567      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
568  LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)  LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
569     }     }
570     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
571     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
572     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
573     {     {
574      m_readytype='E';          m_readytype='E';
575     }     }
576     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
577     {     {
578      m_readytype='T';          m_readytype='T';
579     }     }
580     else     else
581     {     {
582      m_readytype='C';          m_readytype='C';
583     }     }
584     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
585     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
# Line 555  LAZYDEBUG(cout << "(3)Lazy created with Line 590  LAZYDEBUG(cout << "(3)Lazy created with
590  }  }
591    
592  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
593      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
594      m_op(op),          m_op(op),
595      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
596      m_transpose(transpose)          m_transpose(transpose)
597  {  {
598     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
599     {     {
600      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
601     }     }
602     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
603     {     {
604      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
605     }     }
606     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
607     {     {
608      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
609      Data ltemp(left);          Data ltemp(left);
610      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
611      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
612     }     }
613     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
614     {     {
615      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
616      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
617     }     }
618  //    left->operandCheck(*right);  //    left->operandCheck(*right);
619    
620     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
621     {     {
622      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
623     }     }
624     else     else
625     {     {
626      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
627     }     }
628     if (right->isLazy())     if (right->isLazy())
629     {     {
630      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
631     }     }
632     else     else
633     {     {
634      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
635     }     }
636     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
637     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
638     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
639     {     {
640      m_readytype='E';          m_readytype='E';
641     }     }
642     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
643     {     {
644      m_readytype='T';          m_readytype='T';
645     }     }
646     else     else
647     {     {
648      m_readytype='C';          m_readytype='C';
649     }     }
650     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
651     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
# Line 622  LAZYDEBUG(cout << "(4)Lazy created with Line 657  LAZYDEBUG(cout << "(4)Lazy created with
657    
658    
659  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
660      : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),          : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
661      m_op(op),          m_op(op),
662      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
663      m_transpose(0),          m_transpose(0),
664      m_tol(0)          m_tol(0)
665  {  {
666     if ((getOpgroup(op)!=G_NP1OUT_P))     if ((getOpgroup(op)!=G_NP1OUT_P))
667     {     {
668      throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
669     }     }
670     DataLazy_ptr lleft;     DataLazy_ptr lleft;
671     if (!left->isLazy())     if (!left->isLazy())
672     {     {
673      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
674     }     }
675     else     else
676     {     {
677      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
678     }     }
679     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
680     m_left=lleft;     m_left=lleft;
# Line 652  LAZYDEBUG(cout << "(5)Lazy created with Line 687  LAZYDEBUG(cout << "(5)Lazy created with
687  }  }
688    
689  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
690      : parent(left->getFunctionSpace(), left->getShape()),          : parent(left->getFunctionSpace(), left->getShape()),
691      m_op(op),          m_op(op),
692      m_axis_offset(0),          m_axis_offset(0),
693      m_transpose(0),          m_transpose(0),
694      m_tol(tol)          m_tol(tol)
695  {  {
696     if ((getOpgroup(op)!=G_UNARY_P))     if ((getOpgroup(op)!=G_UNARY_P))
697     {     {
698      throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
699     }     }
700     DataLazy_ptr lleft;     DataLazy_ptr lleft;
701     if (!left->isLazy())     if (!left->isLazy())
702     {     {
703      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
704     }     }
705     else     else
706     {     {
707      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
708     }     }
709     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
710     m_left=lleft;     m_left=lleft;
# Line 683  LAZYDEBUG(cout << "(6)Lazy created with Line 718  LAZYDEBUG(cout << "(6)Lazy created with
718    
719    
720  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
721      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),          : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
722      m_op(op),          m_op(op),
723      m_axis_offset(axis0),          m_axis_offset(axis0),
724      m_transpose(axis1),          m_transpose(axis1),
725      m_tol(0)          m_tol(0)
726  {  {
727     if ((getOpgroup(op)!=G_NP1OUT_2P))     if ((getOpgroup(op)!=G_NP1OUT_2P))
728     {     {
729      throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
730     }     }
731     DataLazy_ptr lleft;     DataLazy_ptr lleft;
732     if (!left->isLazy())     if (!left->isLazy())
733     {     {
734      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
735     }     }
736     else     else
737     {     {
738      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
739     }     }
740     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
741     m_left=lleft;     m_left=lleft;
# Line 712  DataLazy::DataLazy(DataAbstract_ptr left Line 747  DataLazy::DataLazy(DataAbstract_ptr left
747  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
748  }  }
749    
750    
751    namespace
752    {
753    
754        inline int max3(int a, int b, int c)
755        {
756            int t=(a>b?a:b);
757            return (t>c?t:c);
758    
759        }
760    }
761    
762    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
763            : parent(left->getFunctionSpace(), left->getShape()),
764            m_op(CONDEVAL),
765            m_axis_offset(0),
766            m_transpose(0),
767            m_tol(0)
768    {
769    
770       DataLazy_ptr lmask;
771       DataLazy_ptr lleft;
772       DataLazy_ptr lright;
773       if (!mask->isLazy())
774       {
775            lmask=DataLazy_ptr(new DataLazy(mask));
776       }
777       else
778       {
779            lmask=dynamic_pointer_cast<DataLazy>(mask);
780       }
781       if (!left->isLazy())
782       {
783            lleft=DataLazy_ptr(new DataLazy(left));
784       }
785       else
786       {
787            lleft=dynamic_pointer_cast<DataLazy>(left);
788       }
789       if (!right->isLazy())
790       {
791            lright=DataLazy_ptr(new DataLazy(right));
792       }
793       else
794       {
795            lright=dynamic_pointer_cast<DataLazy>(right);
796       }
797       m_readytype=lmask->m_readytype;
798       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
799       {
800            throw DataException("Programmer Error - condEval arguments must have the same readytype");
801       }
802       m_left=lleft;
803       m_right=lright;
804       m_mask=lmask;
805       m_samplesize=getNumDPPSample()*getNoValues();
806       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
807       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
808       LazyNodeSetup();
809       SIZELIMIT
810    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
811    }
812    
813    
814    
815  DataLazy::~DataLazy()  DataLazy::~DataLazy()
816  {  {
817     delete[] m_sampleids;     delete[] m_sampleids;
# Line 724  DataLazy::~DataLazy() Line 824  DataLazy::~DataLazy()
824    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
825  */  */
826  DataReady_ptr  DataReady_ptr
827  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
828  {  {
829    if (m_readytype=='E')    if (m_readytype=='E')
830    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
831      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
832    }    }
833    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 745  DataLazy::collapseToReady() Line 845  DataLazy::collapseToReady()
845    switch(m_op)    switch(m_op)
846    {    {
847      case ADD:      case ADD:
848      result=left+right;          result=left+right;
849      break;          break;
850      case SUB:            case SUB:          
851      result=left-right;          result=left-right;
852      break;          break;
853      case MUL:            case MUL:          
854      result=left*right;          result=left*right;
855      break;          break;
856      case DIV:            case DIV:          
857      result=left/right;          result=left/right;
858      break;          break;
859      case SIN:      case SIN:
860      result=left.sin();            result=left.sin();      
861      break;          break;
862      case COS:      case COS:
863      result=left.cos();          result=left.cos();
864      break;          break;
865      case TAN:      case TAN:
866      result=left.tan();          result=left.tan();
867      break;          break;
868      case ASIN:      case ASIN:
869      result=left.asin();          result=left.asin();
870      break;          break;
871      case ACOS:      case ACOS:
872      result=left.acos();          result=left.acos();
873      break;          break;
874      case ATAN:      case ATAN:
875      result=left.atan();          result=left.atan();
876      break;          break;
877      case SINH:      case SINH:
878      result=left.sinh();          result=left.sinh();
879      break;          break;
880      case COSH:      case COSH:
881      result=left.cosh();          result=left.cosh();
882      break;          break;
883      case TANH:      case TANH:
884      result=left.tanh();          result=left.tanh();
885      break;          break;
886      case ERF:      case ERF:
887      result=left.erf();          result=left.erf();
888      break;          break;
889     case ASINH:     case ASINH:
890      result=left.asinh();          result=left.asinh();
891      break;          break;
892     case ACOSH:     case ACOSH:
893      result=left.acosh();          result=left.acosh();
894      break;          break;
895     case ATANH:     case ATANH:
896      result=left.atanh();          result=left.atanh();
897      break;          break;
898      case LOG10:      case LOG10:
899      result=left.log10();          result=left.log10();
900      break;          break;
901      case LOG:      case LOG:
902      result=left.log();          result=left.log();
903      break;          break;
904      case SIGN:      case SIGN:
905      result=left.sign();          result=left.sign();
906      break;          break;
907      case ABS:      case ABS:
908      result=left.abs();          result=left.abs();
909      break;          break;
910      case NEG:      case NEG:
911      result=left.neg();          result=left.neg();
912      break;          break;
913      case POS:      case POS:
914      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
915      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
916      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
917      break;          break;
918      case EXP:      case EXP:
919      result=left.exp();          result=left.exp();
920      break;          break;
921      case SQRT:      case SQRT:
922      result=left.sqrt();          result=left.sqrt();
923      break;          break;
924      case RECIP:      case RECIP:
925      result=left.oneOver();          result=left.oneOver();
926      break;          break;
927      case GZ:      case GZ:
928      result=left.wherePositive();          result=left.wherePositive();
929      break;          break;
930      case LZ:      case LZ:
931      result=left.whereNegative();          result=left.whereNegative();
932      break;          break;
933      case GEZ:      case GEZ:
934      result=left.whereNonNegative();          result=left.whereNonNegative();
935      break;          break;
936      case LEZ:      case LEZ:
937      result=left.whereNonPositive();          result=left.whereNonPositive();
938      break;          break;
939      case NEZ:      case NEZ:
940      result=left.whereNonZero(m_tol);          result=left.whereNonZero(m_tol);
941      break;          break;
942      case EZ:      case EZ:
943      result=left.whereZero(m_tol);          result=left.whereZero(m_tol);
944      break;          break;
945      case SYM:      case SYM:
946      result=left.symmetric();          result=left.symmetric();
947      break;          break;
948      case NSYM:      case NSYM:
949      result=left.nonsymmetric();          result=left.antisymmetric();
950      break;          break;
951      case PROD:      case PROD:
952      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
953      break;          break;
954      case TRANS:      case TRANS:
955      result=left.transpose(m_axis_offset);          result=left.transpose(m_axis_offset);
956      break;          break;
957      case TRACE:      case TRACE:
958      result=left.trace(m_axis_offset);          result=left.trace(m_axis_offset);
959      break;          break;
960      case SWAP:      case SWAP:
961      result=left.swapaxes(m_axis_offset, m_transpose);          result=left.swapaxes(m_axis_offset, m_transpose);
962      break;          break;
963      case MINVAL:      case MINVAL:
964      result=left.minval();          result=left.minval();
965      break;          break;
966      case MAXVAL:      case MAXVAL:
967      result=left.minval();          result=left.minval();
968            break;
969        case HER:
970        result=left.hermitian();
971      break;      break;
972      default:      default:
973      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
974    }    }
975    return result.borrowReadyPtr();    return result.borrowReadyPtr();
976  }  }
# Line 879  DataLazy::collapseToReady() Line 982  DataLazy::collapseToReady()
982     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
983  */  */
984  void  void
985  DataLazy::collapse()  DataLazy::collapse() const
986  {  {
987    if (m_op==IDENTITY)    if (m_op==IDENTITY)
988    {    {
989      return;          return;
990    }    }
991    if (m_readytype=='E')    if (m_readytype=='E')
992    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
993      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
994    }    }
995    m_id=collapseToReady();    m_id=collapseToReady();
996    m_op=IDENTITY;    m_op=IDENTITY;
997  }  }
998    
   
   
   
   
   
 #define PROC_OP(TYPE,X)                               \  
     for (int j=0;j<onumsteps;++j)\  
     {\  
       for (int i=0;i<numsteps;++i,resultp+=resultStep) \  
       { \  
 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\  
 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\  
          tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  
 LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \  
          lroffset+=leftstep; \  
          rroffset+=rightstep; \  
       }\  
       lroffset+=oleftstep;\  
       rroffset+=orightstep;\  
     }  
   
   
999  // The result will be stored in m_samples  // The result will be stored in m_samples
1000  // The return value is a pointer to the DataVector, offset is the offset within the return value  // The return value is a pointer to the DataVector, offset is the offset within the return value
1001  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1002  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1003  {  {
1004  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1005      // collapse so we have a 'E' node or an IDENTITY for some other type          // collapse so we have a 'E' node or an IDENTITY for some other type
1006    if (m_readytype!='E' && m_op!=IDENTITY)    if (m_readytype!='E' && m_op!=IDENTITY)
1007    {    {
1008      collapse();          collapse();
1009    }    }
1010    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
1011    {    {
1012      const ValueType& vec=m_id->getVectorRO();      const RealVectorType& vec=m_id->getVectorRO();
1013      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
1014    #ifdef LAZY_STACK_PROF
1015    int x;
1016    if (&x<stackend[omp_get_thread_num()])
1017    {
1018           stackend[omp_get_thread_num()]=&x;
1019    }
1020    #endif
1021      return &(vec);      return &(vec);
1022    }    }
1023    if (m_readytype!='E')    if (m_readytype!='E')
# Line 938  LAZYDEBUG(cout << "Resolve sample " << t Line 1026  LAZYDEBUG(cout << "Resolve sample " << t
1026    }    }
1027    if (m_sampleids[tid]==sampleNo)    if (m_sampleids[tid]==sampleNo)
1028    {    {
1029      roffset=tid*m_samplesize;          roffset=tid*m_samplesize;
1030      return &(m_samples);        // sample is already resolved          return &(m_samples);            // sample is already resolved
1031    }    }
1032    m_sampleids[tid]=sampleNo;    m_sampleids[tid]=sampleNo;
1033    
1034    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1035    {    {
1036    case G_UNARY:    case G_UNARY:
# Line 952  LAZYDEBUG(cout << "Resolve sample " << t Line 1041  LAZYDEBUG(cout << "Resolve sample " << t
1041    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1042    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1043    case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);    case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1044      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1045    default:    default:
1046      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1047    }    }
1048  }  }
1049    
1050  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1051  DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1052  {  {
1053      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1054      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1055      // processing single points.          // processing single points.
1056      // we will also know we won't get identity nodes          // we will also know we won't get identity nodes
1057    if (m_readytype!='E')    if (m_readytype!='E')
1058    {    {
1059      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
# Line 972  DataLazy::resolveNodeUnary(int tid, int Line 1062  DataLazy::resolveNodeUnary(int tid, int
1062    {    {
1063      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1064    }    }
1065    const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);    const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1066    const double* left=&((*leftres)[roffset]);    const double* left=&((*leftres)[roffset]);
1067    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1068    double* result=&(m_samples[roffset]);    double* result=&(m_samples[roffset]);
1069      escript::ESFunction operation=SINF;
1070    switch (m_op)    switch (m_op)
1071    {    {
1072      case SIN:        case SIN:
1073      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);      operation=SINF;
1074      break;      break;
1075      case COS:      case COS:
1076      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);          operation=COSF;
1077      break;      break;
1078      case TAN:      case TAN:
1079      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);          operation=TANF;
1080      break;      break;
1081      case ASIN:      case ASIN:
1082      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);          operation=ASINF;
1083      break;      break;
1084      case ACOS:      case ACOS:
1085      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);          operation=ACOSF;
1086      break;      break;
1087      case ATAN:      case ATAN:
1088      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);          operation=ATANF;
1089      break;      break;
1090      case SINH:      case SINH:
1091      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);          operation=SINHF;
1092      break;      break;
1093      case COSH:      case COSH:
1094      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);          operation=COSHF;
1095      break;      break;
1096      case TANH:      case TANH:
1097      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);          operation=TANHF;
1098      break;      break;
1099      case ERF:      case ERF:
1100  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ERFF;
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
1101      break;      break;
 #endif  
1102     case ASINH:     case ASINH:
1103  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ASINHF;
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
1104      break;      break;
1105     case ACOSH:     case ACOSH:
1106  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ACOSHF;
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
1107      break;      break;
1108     case ATANH:     case ATANH:
1109  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ATANHF;
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
1110      break;      break;
1111      case LOG10:      case LOG10:
1112      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);          operation=LOG10F;
1113      break;      break;
1114      case LOG:      case LOG:
1115      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);          operation=LOGF;
1116      break;      break;
1117      case SIGN:      case SIGN:
1118      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          operation=SIGNF;
1119      break;      break;
1120      case ABS:      case ABS:
1121      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);          operation=ABSF;
1122      break;      break;
1123      case NEG:      case NEG:
1124      tensor_unary_operation(m_samplesize, left, result, negate<double>());          operation=NEGF;
1125      break;      break;
1126      case POS:      case POS:
1127      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1128      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1129      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1130      break;          break;
1131      case EXP:      case EXP:
1132      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);          operation=EXPF;
1133      break;      break;
1134      case SQRT:      case SQRT:
1135      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);          operation=SQRTF;
1136      break;      break;
1137      case RECIP:      case RECIP:
1138      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          operation=INVF;
1139      break;      break;
1140      case GZ:      case GZ:
1141      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          operation=GTZEROF;
1142      break;      break;
1143      case LZ:      case LZ:
1144      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          operation=LTZEROF;
1145      break;      break;
1146      case GEZ:      case GEZ:
1147      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          operation=GEZEROF;
1148      break;      break;
1149      case LEZ:      case LEZ:
1150      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          operation=LEZEROF;
1151      break;      break;
1152  // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1153      case NEZ:      case NEZ:
1154      tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));          operation=NEQZEROF;
1155      break;      break;
1156      case EZ:      case EZ:
1157      tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));          operation=EQZEROF;
1158      break;      break;
   
1159      default:      default:
1160      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1161    }    }
1162      tensor_unary_array_operation(m_samplesize,
1163                                 left,
1164                                 result,
1165                                 operation,
1166                                 m_tol);  
1167    return &(m_samples);    return &(m_samples);
1168  }  }
1169    
1170    
1171  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1172  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1173  {  {
1174      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1175      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1176      // processing single points.          // processing single points.
1177      // we will also know we won't get identity nodes          // we will also know we won't get identity nodes
1178    if (m_readytype!='E')    if (m_readytype!='E')
1179    {    {
1180      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
# Line 1105  DataLazy::resolveNodeReduction(int tid, Line 1184  DataLazy::resolveNodeReduction(int tid,
1184      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1185    }    }
1186    size_t loffset=0;    size_t loffset=0;
1187    const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);    const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1188    
1189    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1190    unsigned int ndpps=getNumDPPSample();    unsigned int ndpps=getNumDPPSample();
1191    unsigned int psize=DataTypes::noValues(getShape());    unsigned int psize=DataTypes::noValues(m_left->getShape());
1192    double* result=&(m_samples[roffset]);    double* result=&(m_samples[roffset]);
1193    switch (m_op)    switch (m_op)
1194    {    {
1195      case MINVAL:      case MINVAL:
1196      {          {
1197        for (unsigned int z=0;z<ndpps;++z)            for (unsigned int z=0;z<ndpps;++z)
1198        {            {
1199          FMin op;              FMin op;
1200          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());              *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1201          loffset+=psize;              loffset+=psize;
1202          result++;              result++;
1203        }            }
1204      }          }
1205      break;          break;
1206      case MAXVAL:      case MAXVAL:
1207      {          {
1208        for (unsigned int z=0;z<ndpps;++z)            for (unsigned int z=0;z<ndpps;++z)
1209        {            {
1210        FMax op;            FMax op;
1211        *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);            *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1212        loffset+=psize;            loffset+=psize;
1213        result++;            result++;
1214        }            }
1215      }          }
1216      break;          break;
1217      default:      default:
1218      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1219    }    }
1220    return &(m_samples);    return &(m_samples);
1221  }  }
1222    
1223  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1224  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1225  {  {
1226      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1227      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1228      // processing single points.          // processing single points.
1229    if (m_readytype!='E')    if (m_readytype!='E')
1230    {    {
1231      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
# Line 1156  DataLazy::resolveNodeNP1OUT(int tid, int Line 1235  DataLazy::resolveNodeNP1OUT(int tid, int
1235      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1236    }    }
1237    size_t subroffset;    size_t subroffset;
1238    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1239    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1240    size_t loop=0;    size_t loop=0;
1241    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
# Line 1165  DataLazy::resolveNodeNP1OUT(int tid, int Line 1244  DataLazy::resolveNodeNP1OUT(int tid, int
1244    switch (m_op)    switch (m_op)
1245    {    {
1246      case SYM:      case SYM:
1247      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1248      {          {
1249          DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);              escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1250          subroffset+=step;              subroffset+=step;
1251          offset+=step;              offset+=step;
1252      }          }
1253      break;          break;
1254      case NSYM:      case NSYM:
1255      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1256      {          {
1257          DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);              escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1258          subroffset+=step;              subroffset+=step;
1259          offset+=step;              offset+=step;
1260      }          }
1261      break;          break;
1262      default:      default:
1263      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1264    }    }
1265    return &m_samples;    return &m_samples;
1266  }  }
1267    
1268  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1269  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1270  {  {
1271      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1272      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1273      // processing single points.          // processing single points.
1274    if (m_readytype!='E')    if (m_readytype!='E')
1275    {    {
1276      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
# Line 1202  DataLazy::resolveNodeNP1OUT_P(int tid, i Line 1281  DataLazy::resolveNodeNP1OUT_P(int tid, i
1281    }    }
1282    size_t subroffset;    size_t subroffset;
1283    size_t offset;    size_t offset;
1284    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1285    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1286    offset=roffset;    offset=roffset;
1287    size_t loop=0;    size_t loop=0;
# Line 1212  DataLazy::resolveNodeNP1OUT_P(int tid, i Line 1291  DataLazy::resolveNodeNP1OUT_P(int tid, i
1291    switch (m_op)    switch (m_op)
1292    {    {
1293      case TRACE:      case TRACE:
1294      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1295      {          {
1296              DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);              escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1297          subroffset+=instep;              subroffset+=instep;
1298          offset+=outstep;              offset+=outstep;
1299      }          }
1300      break;          break;
1301      case TRANS:      case TRANS:
1302      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1303      {          {
1304              DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);              escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1305          subroffset+=instep;              subroffset+=instep;
1306          offset+=outstep;              offset+=outstep;
1307      }          }
1308      break;          break;
1309      default:      default:
1310      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1311    }    }
1312    return &m_samples;    return &m_samples;
1313  }  }
1314    
1315    
1316  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1317  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1318  {  {
1319    if (m_readytype!='E')    if (m_readytype!='E')
1320    {    {
# Line 1247  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1326  DataLazy::resolveNodeNP1OUT_2P(int tid,
1326    }    }
1327    size_t subroffset;    size_t subroffset;
1328    size_t offset;    size_t offset;
1329    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1330    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1331    offset=roffset;    offset=roffset;
1332    size_t loop=0;    size_t loop=0;
# Line 1257  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1336  DataLazy::resolveNodeNP1OUT_2P(int tid,
1336    switch (m_op)    switch (m_op)
1337    {    {
1338      case SWAP:      case SWAP:
1339      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1340      {          {
1341              DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);              escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1342          subroffset+=instep;              subroffset+=instep;
1343          offset+=outstep;              offset+=outstep;
1344      }          }
1345      break;          break;
1346      default:      default:
1347      throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1348    }    }
1349    return &m_samples;    return &m_samples;
1350  }  }
1351    
1352    const DataTypes::RealVectorType*
1353    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1354    {
1355      if (m_readytype!='E')
1356      {
1357        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1358      }
1359      if (m_op!=CONDEVAL)
1360      {
1361        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1362      }
1363      size_t subroffset;
1364    
1365      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1366      const RealVectorType* srcres=0;
1367      if ((*maskres)[subroffset]>0)
1368      {
1369            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1370      }
1371      else
1372      {
1373            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1374      }
1375    
1376      // Now we need to copy the result
1377    
1378      roffset=m_samplesize*tid;
1379      for (int i=0;i<m_samplesize;++i)
1380      {
1381            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1382      }
1383    
1384      return &m_samples;
1385    }
1386    
1387  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1388  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
# Line 1281  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1393  DataLazy::resolveNodeNP1OUT_2P(int tid,
1393  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1394  // For example, 2+Vector.  // For example, 2+Vector.
1395  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1396  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1397  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1398  {  {
1399  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1400    
1401    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1402      // first work out which of the children are expanded          // first work out which of the children are expanded
1403    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1404    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1405    if (!leftExp && !rightExp)    if (!leftExp && !rightExp)
1406    {    {
1407      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1408    }    }
1409    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1410    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1411    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1412    {    {
1413      throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1414    }    }
1415    size_t leftsize=m_left->getNoValues();    size_t leftsize=m_left->getNoValues();
1416    size_t rightsize=m_right->getNoValues();    size_t rightsize=m_right->getNoValues();
1417    size_t chunksize=1;           // how many doubles will be processed in one go    size_t chunksize=1;                   // how many doubles will be processed in one go
1418    int leftstep=0;       // how far should the left offset advance after each step    int leftstep=0;               // how far should the left offset advance after each step
1419    int rightstep=0;    int rightstep=0;
1420    int numsteps=0;       // total number of steps for the inner loop    int numsteps=0;               // total number of steps for the inner loop
1421    int oleftstep=0;  // the o variables refer to the outer loop    int oleftstep=0;      // the o variables refer to the outer loop
1422    int orightstep=0; // The outer loop is only required in cases where there is an extended scalar    int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1423    int onumsteps=1;    int onumsteps=1;
1424        
1425    bool LES=(leftExp && leftScalar); // Left is an expanded scalar    bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1426    bool RES=(rightExp && rightScalar);    bool RES=(rightExp && rightScalar);
1427    bool LS=(!leftExp && leftScalar); // left is a single scalar    bool LS=(!leftExp && leftScalar);     // left is a single scalar
1428    bool RS=(!rightExp && rightScalar);    bool RS=(!rightExp && rightScalar);
1429    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1430    bool RN=(!rightExp && !rightScalar);    bool RN=(!rightExp && !rightScalar);
1431    bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar    bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1432    bool REN=(rightExp && !rightScalar);    bool REN=(rightExp && !rightScalar);
1433    
1434    if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars    if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1435    {    {
1436      chunksize=m_left->getNumDPPSample()*leftsize;          chunksize=m_left->getNumDPPSample()*leftsize;
1437      leftstep=0;          leftstep=0;
1438      rightstep=0;          rightstep=0;
1439      numsteps=1;          numsteps=1;
1440    }    }
1441    else if (LES || RES)    else if (LES || RES)
1442    {    {
1443      chunksize=1;          chunksize=1;
1444      if (LES)        // left is an expanded scalar          if (LES)                // left is an expanded scalar
1445      {          {
1446          if (RS)                  if (RS)
1447          {                  {
1448             leftstep=1;                     leftstep=1;
1449             rightstep=0;                     rightstep=0;
1450             numsteps=m_left->getNumDPPSample();                     numsteps=m_left->getNumDPPSample();
1451          }                  }
1452          else        // RN or REN                  else            // RN or REN
1453          {                  {
1454             leftstep=0;                     leftstep=0;
1455             oleftstep=1;                     oleftstep=1;
1456             rightstep=1;                     rightstep=1;
1457             orightstep=(RN ? -(int)rightsize : 0);                     orightstep=(RN ? -(int)rightsize : 0);
1458             numsteps=rightsize;                     numsteps=rightsize;
1459             onumsteps=m_left->getNumDPPSample();                     onumsteps=m_left->getNumDPPSample();
1460          }                  }
1461      }          }
1462      else        // right is an expanded scalar          else            // right is an expanded scalar
1463      {          {
1464          if (LS)                  if (LS)
1465          {                  {
1466             rightstep=1;                     rightstep=1;
1467             leftstep=0;                     leftstep=0;
1468             numsteps=m_right->getNumDPPSample();                     numsteps=m_right->getNumDPPSample();
1469          }                  }
1470          else                  else
1471          {                  {
1472             rightstep=0;                     rightstep=0;
1473             orightstep=1;                     orightstep=1;
1474             leftstep=1;                     leftstep=1;
1475             oleftstep=(LN ? -(int)leftsize : 0);                     oleftstep=(LN ? -(int)leftsize : 0);
1476             numsteps=leftsize;                     numsteps=leftsize;
1477             onumsteps=m_right->getNumDPPSample();                     onumsteps=m_right->getNumDPPSample();
1478          }                  }
1479      }          }
1480    }    }
1481    else  // this leaves (LEN, RS), (LEN, RN) and their transposes    else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1482    {    {
1483      if (LEN)    // and Right will be a single value          if (LEN)        // and Right will be a single value
1484      {          {
1485          chunksize=rightsize;                  chunksize=rightsize;
1486          leftstep=rightsize;                  leftstep=rightsize;
1487          rightstep=0;                  rightstep=0;
1488          numsteps=m_left->getNumDPPSample();                  numsteps=m_left->getNumDPPSample();
1489          if (RS)                  if (RS)
1490          {                  {
1491             numsteps*=leftsize;                     numsteps*=leftsize;
1492          }                  }
1493      }          }
1494      else    // REN          else    // REN
1495      {          {
1496          chunksize=leftsize;                  chunksize=leftsize;
1497          rightstep=leftsize;                  rightstep=leftsize;
1498          leftstep=0;                  leftstep=0;
1499          numsteps=m_right->getNumDPPSample();                  numsteps=m_right->getNumDPPSample();
1500          if (LS)                  if (LS)
1501          {                  {
1502             numsteps*=rightsize;                     numsteps*=rightsize;
1503          }                  }
1504      }          }
1505    }    }
1506    
1507    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0    int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1508      // Get the values of sub-expressions          // Get the values of sub-expressions
1509    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1510    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);    const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1511  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1512  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1513  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
# Line 1409  LAZYDEBUG(cout << "Right res["<< rroffse Line 1521  LAZYDEBUG(cout << "Right res["<< rroffse
1521    
1522    
1523    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1524    double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved    double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1525    switch(m_op)    switch(m_op)
1526    {    {
1527      case ADD:      case ADD:
1528          PROC_OP(NO_ARG,plus<double>());          //PROC_OP(NO_ARG,plus<double>());
1529      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1530                 &(*left)[0],
1531                 &(*right)[0],
1532                 chunksize,
1533                 onumsteps,
1534                 numsteps,
1535                 resultStep,
1536                 leftstep,
1537                 rightstep,
1538                 oleftstep,
1539                 orightstep,
1540                 lroffset,
1541                 rroffset,
1542                 escript::ESFunction::PLUSF);  
1543            break;
1544      case SUB:      case SUB:
1545      PROC_OP(NO_ARG,minus<double>());        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1546      break;               &(*left)[0],
1547                 &(*right)[0],
1548                 chunksize,
1549                 onumsteps,
1550                 numsteps,
1551                 resultStep,
1552                 leftstep,
1553                 rightstep,
1554                 oleftstep,
1555                 orightstep,
1556                 lroffset,
1557                 rroffset,
1558                 escript::ESFunction::MINUSF);        
1559            //PROC_OP(NO_ARG,minus<double>());
1560            break;
1561      case MUL:      case MUL:
1562      PROC_OP(NO_ARG,multiplies<double>());          //PROC_OP(NO_ARG,multiplies<double>());
1563      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1564                 &(*left)[0],
1565                 &(*right)[0],
1566                 chunksize,
1567                 onumsteps,
1568                 numsteps,
1569                 resultStep,
1570                 leftstep,
1571                 rightstep,
1572                 oleftstep,
1573                 orightstep,
1574                 lroffset,
1575                 rroffset,
1576                 escript::ESFunction::MULTIPLIESF);      
1577            break;
1578      case DIV:      case DIV:
1579      PROC_OP(NO_ARG,divides<double>());          //PROC_OP(NO_ARG,divides<double>());
1580      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1581                 &(*left)[0],
1582                 &(*right)[0],
1583                 chunksize,
1584                 onumsteps,
1585                 numsteps,
1586                 resultStep,
1587                 leftstep,
1588                 rightstep,
1589                 oleftstep,
1590                 orightstep,
1591                 lroffset,
1592                 rroffset,
1593                 escript::ESFunction::DIVIDESF);          
1594            break;
1595      case POW:      case POW:
1596         PROC_OP(double (double,double),::pow);         //PROC_OP(double (double,double),::pow);
1597      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1598                 &(*left)[0],
1599                 &(*right)[0],
1600                 chunksize,
1601                 onumsteps,
1602                 numsteps,
1603                 resultStep,
1604                 leftstep,
1605                 rightstep,
1606                 oleftstep,
1607                 orightstep,
1608                 lroffset,
1609                 rroffset,
1610                 escript::ESFunction::POWF);          
1611            break;
1612      default:      default:
1613      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1614    }    }
1615  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1616    return &m_samples;    return &m_samples;
# Line 1438  LAZYDEBUG(cout << "Result res[" << roffs Line 1620  LAZYDEBUG(cout << "Result res[" << roffs
1620  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1621  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1622  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1623  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1624  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1625  {  {
1626  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1627    
1628    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1629      // first work out which of the children are expanded          // first work out which of the children are expanded
1630    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1631    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1632    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1633    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1634    int rightStep=(rightExp?m_right->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1635    
1636    int resultStep=getNoValues();    int resultStep=getNoValues();
1637    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1638    size_t offset=roffset;    size_t offset=roffset;
1639    
1640    const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);    const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1641    
1642    const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);    const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1643    
1644  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1645  cout << getNoValues() << endl;)  cout << getNoValues() << endl;)
# Line 1471  LAZYDEBUG(cout << "m_samplesize=" << m_s Line 1653  LAZYDEBUG(cout << "m_samplesize=" << m_s
1653  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1654  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1655    
1656    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1657    switch(m_op)    switch(m_op)
1658    {    {
1659      case PROD:      case PROD:
1660      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1661      {          {
1662            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1663            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1664    
1665  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1666  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1667    
1668            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1669    
1670        lroffset+=leftStep;            lroffset+=leftStep;
1671        rroffset+=rightStep;            rroffset+=rightStep;
1672      }          }
1673      break;          break;
1674      default:      default:
1675      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1676    }    }
1677    roffset=offset;    roffset=offset;
1678    return &m_samples;    return &m_samples;
1679  }  }
1680    
1681    
1682  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1683  DataLazy::resolveSample(int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1684  {  {
1685  #ifdef _OPENMP  #ifdef _OPENMP
1686      int tid=omp_get_thread_num();          int tid=omp_get_thread_num();
1687  #else  #else
1688      int tid=0;          int tid=0;
1689  #endif  #endif
1690      return resolveNodeSample(tid, sampleNo, roffset);  
1691    #ifdef LAZY_STACK_PROF
1692            stackstart[tid]=&tid;
1693            stackend[tid]=&tid;
1694            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1695            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1696            #pragma omp critical
1697            if (d>maxstackuse)
1698            {
1699    cout << "Max resolve Stack use " << d << endl;
1700                    maxstackuse=d;
1701            }
1702            return r;
1703    #else
1704            return resolveNodeSample(tid, sampleNo, roffset);
1705    #endif
1706  }  }
1707    
1708    
# Line 1514  void Line 1711  void
1711  DataLazy::resolveToIdentity()  DataLazy::resolveToIdentity()
1712  {  {
1713     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1714      return;          return;
1715     DataReady_ptr p=resolveNodeWorker();     DataReady_ptr p=resolveNodeWorker();
1716     makeIdentity(p);     makeIdentity(p);
1717  }  }
# Line 1544  DataLazy::resolve() Line 1741  DataLazy::resolve()
1741      return m_id;      return m_id;
1742  }  }
1743    
1744    
1745    /* This is really a static method but I think that caused problems in windows */
1746    void
1747    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1748    {
1749      if (dats.empty())
1750      {
1751            return;
1752      }
1753      vector<DataLazy*> work;
1754      FunctionSpace fs=dats[0]->getFunctionSpace();
1755      bool match=true;
1756      for (int i=dats.size()-1;i>=0;--i)
1757      {
1758            if (dats[i]->m_readytype!='E')
1759            {
1760                    dats[i]->collapse();
1761            }
1762            if (dats[i]->m_op!=IDENTITY)
1763            {
1764                    work.push_back(dats[i]);
1765                    if (fs!=dats[i]->getFunctionSpace())
1766                    {
1767                            match=false;
1768                    }
1769            }
1770      }
1771      if (work.empty())
1772      {
1773            return;         // no work to do
1774      }
1775      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1776      {             // it is possible that dats[0] is one of the objects which we discarded and
1777                    // all the other functionspaces match.
1778            vector<DataExpanded*> dep;
1779            vector<RealVectorType*> vecs;
1780            for (int i=0;i<work.size();++i)
1781            {
1782                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1783                    vecs.push_back(&(dep[i]->getVectorRW()));
1784            }
1785            int totalsamples=work[0]->getNumSamples();
1786            const RealVectorType* res=0; // Storage for answer
1787            int sample;
1788            #pragma omp parallel private(sample, res)
1789            {
1790                size_t roffset=0;
1791                #pragma omp for schedule(static)
1792                for (sample=0;sample<totalsamples;++sample)
1793                {
1794                    roffset=0;
1795                    int j;
1796                    for (j=work.size()-1;j>=0;--j)
1797                    {
1798    #ifdef _OPENMP
1799                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1800    #else
1801                        res=work[j]->resolveNodeSample(0,sample,roffset);
1802    #endif
1803                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1804                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1805                    }
1806                }
1807            }
1808            // Now we need to load the new results as identity ops into the lazy nodes
1809            for (int i=work.size()-1;i>=0;--i)
1810            {
1811                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1812            }
1813      }
1814      else  // functionspaces do not match
1815      {
1816            for (int i=0;i<work.size();++i)
1817            {
1818                    work[i]->resolveToIdentity();
1819            }
1820      }
1821    }
1822    
1823    
1824    
1825  // This version of resolve uses storage in each node to hold results  // This version of resolve uses storage in each node to hold results
1826  DataReady_ptr  DataReady_ptr
1827  DataLazy::resolveNodeWorker()  DataLazy::resolveNodeWorker()
1828  {  {
1829    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1830    {    {
1831      collapse();      collapse();
1832    }    }
1833    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1834    {    {
1835      return m_id;      return m_id;
1836    }    }
1837      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1838    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1839    ValueType& resvec=result->getVectorRW();    RealVectorType& resvec=result->getVectorRW();
1840    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1841    
1842    int sample;    int sample;
1843    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1844    const ValueType* res=0;   // Storage for answer    const RealVectorType* res=0;       // Storage for answer
1845  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1846    #pragma omp parallel for private(sample,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1847    {    {
1848      size_t roffset=0;          size_t roffset=0;
1849    #ifdef LAZY_STACK_PROF
1850            stackstart[omp_get_thread_num()]=&roffset;
1851            stackend[omp_get_thread_num()]=&roffset;
1852    #endif
1853            #pragma omp for schedule(static)
1854            for (sample=0;sample<totalsamples;++sample)
1855            {
1856                    roffset=0;
1857  #ifdef _OPENMP  #ifdef _OPENMP
1858      res=resolveNodeSample(omp_get_thread_num(),sample,roffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1859  #else  #else
1860      res=resolveNodeSample(0,sample,roffset);                  res=resolveNodeSample(0,sample,roffset);
1861  #endif  #endif
1862  LAZYDEBUG(cout << "Sample #" << sample << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1863  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1864      DataVector::size_type outoffset=result->getPointOffset(sample,0);                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1865      memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1866            }
1867      }
1868    #ifdef LAZY_STACK_PROF
1869      for (int i=0;i<getNumberOfThreads();++i)
1870      {
1871            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1872    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1873            if (r>maxstackuse)
1874            {
1875                    maxstackuse=r;
1876            }
1877    }    }
1878      cout << "Max resolve Stack use=" << maxstackuse << endl;
1879    #endif
1880    return resptr;    return resptr;
1881  }  }
1882    
# Line 1586  std::string Line 1884  std::string
1884  DataLazy::toString() const  DataLazy::toString() const
1885  {  {
1886    ostringstream oss;    ostringstream oss;
1887    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1888    if (escriptParams.getPRINT_LAZY_TREE()==0)    switch (escriptParams.getLAZY_STR_FMT())
   {  
       intoString(oss);  
   }  
   else  
1889    {    {
1890      oss << endl;    case 1:       // tree format
1891      intoTreeString(oss,"");          oss << endl;
1892            intoTreeString(oss,"");
1893            break;
1894      case 2:       // just the depth
1895            break;
1896      default:
1897            intoString(oss);
1898            break;
1899    }    }
1900    return oss.str();    return oss.str();
1901  }  }
# Line 1607  DataLazy::intoString(ostringstream& oss) Line 1908  DataLazy::intoString(ostringstream& oss)
1908    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1909    {    {
1910    case G_IDENTITY:    case G_IDENTITY:
1911      if (m_id->isExpanded())          if (m_id->isExpanded())
1912      {          {
1913         oss << "E";             oss << "E";
1914      }          }
1915      else if (m_id->isTagged())          else if (m_id->isTagged())
1916      {          {
1917        oss << "T";            oss << "T";
1918      }          }
1919      else if (m_id->isConstant())          else if (m_id->isConstant())
1920      {          {
1921        oss << "C";            oss << "C";
1922      }          }
1923      else          else
1924      {          {
1925        oss << "?";            oss << "?";
1926      }          }
1927      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1928      break;          break;
1929    case G_BINARY:    case G_BINARY:
1930      oss << '(';          oss << '(';
1931      m_left->intoString(oss);          m_left->intoString(oss);
1932      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1933      m_right->intoString(oss);          m_right->intoString(oss);
1934      oss << ')';          oss << ')';
1935      break;          break;
1936    case G_UNARY:    case G_UNARY:
1937    case G_UNARY_P:    case G_UNARY_P:
1938    case G_NP1OUT:    case G_NP1OUT:
1939    case G_NP1OUT_P:    case G_NP1OUT_P:
1940    case G_REDUCTION:    case G_REDUCTION:
1941      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1942      m_left->intoString(oss);          m_left->intoString(oss);
1943      oss << ')';          oss << ')';
1944      break;          break;
1945    case G_TENSORPROD:    case G_TENSORPROD:
1946      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1947      m_left->intoString(oss);          m_left->intoString(oss);
1948      oss << ", ";          oss << ", ";
1949      m_right->intoString(oss);          m_right->intoString(oss);
1950      oss << ')';          oss << ')';
1951      break;          break;
1952    case G_NP1OUT_2P:    case G_NP1OUT_2P:
1953      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1954      m_left->intoString(oss);          m_left->intoString(oss);
1955      oss << ", " << m_axis_offset << ", " << m_transpose;          oss << ", " << m_axis_offset << ", " << m_transpose;
1956      oss << ')';          oss << ')';
1957      break;          break;
1958      case G_CONDEVAL:
1959            oss << opToString(m_op)<< '(' ;
1960            m_mask->intoString(oss);
1961            oss << " ? ";
1962            m_left->intoString(oss);
1963            oss << " : ";
1964            m_right->intoString(oss);
1965            oss << ')';
1966            break;
1967    default:    default:
1968      oss << "UNKNOWN";          oss << "UNKNOWN";
1969    }    }
1970  }  }
1971    
# Line 1667  DataLazy::intoTreeString(ostringstream& Line 1977  DataLazy::intoTreeString(ostringstream&
1977    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1978    {    {
1979    case G_IDENTITY:    case G_IDENTITY:
1980      if (m_id->isExpanded())          if (m_id->isExpanded())
1981      {          {
1982         oss << "E";             oss << "E";
1983      }          }
1984      else if (m_id->isTagged())          else if (m_id->isTagged())
1985      {          {
1986        oss << "T";            oss << "T";
1987      }          }
1988      else if (m_id->isConstant())          else if (m_id->isConstant())
1989      {          {
1990        oss << "C";            oss << "C";
1991      }          }
1992      else          else
1993      {          {
1994        oss << "?";            oss << "?";
1995      }          }
1996      oss << '@' << m_id.get() << endl;          oss << '@' << m_id.get() << endl;
1997      break;          break;
1998    case G_BINARY:    case G_BINARY:
1999      oss << opToString(m_op) << endl;          oss << opToString(m_op) << endl;
2000      indent+='.';          indent+='.';
2001      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
2002      m_right->intoTreeString(oss, indent);          m_right->intoTreeString(oss, indent);
2003      break;          break;
2004    case G_UNARY:    case G_UNARY:
2005    case G_UNARY_P:    case G_UNARY_P:
2006    case G_NP1OUT:    case G_NP1OUT:
2007    case G_NP1OUT_P:    case G_NP1OUT_P:
2008    case G_REDUCTION:    case G_REDUCTION:
2009      oss << opToString(m_op) << endl;          oss << opToString(m_op) << endl;
2010      indent+='.';          indent+='.';
2011      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
2012      break;          break;
2013    case G_TENSORPROD:    case G_TENSORPROD:
2014      oss << opToString(m_op) << endl;          oss << opToString(m_op) << endl;
2015      indent+='.';          indent+='.';
2016      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
2017      m_right->intoTreeString(oss, indent);          m_right->intoTreeString(oss, indent);
2018      break;          break;
2019    case G_NP1OUT_2P:    case G_NP1OUT_2P:
2020      oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;          oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2021      indent+='.';          indent+='.';
2022      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
2023      break;          break;
2024    default:    default:
2025      oss << "UNKNOWN";          oss << "UNKNOWN";
2026    }    }
2027  }  }
2028    
2029    
2030  DataAbstract*  DataAbstract*
2031  DataLazy::deepCopy()  DataLazy::deepCopy() const
2032  {  {
2033    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2034    {    {
2035    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2036    case G_UNARY:    case G_UNARY:
2037    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2038    case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);    case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2039    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2040    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2041    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2042    case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);    case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2043    case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2044    default:    default:
2045      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2046    }    }
2047  }  }
2048    
# Line 1744  DataLazy::deepCopy() Line 2054  DataLazy::deepCopy()
2054  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2055  // form part of the expression.  // form part of the expression.
2056  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2057  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2058  DataLazy::getLength() const  DataLazy::getLength() const
2059  {  {
2060    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1759  DataLazy::getSlice(const DataTypes::Regi Line 2069  DataLazy::getSlice(const DataTypes::Regi
2069    
2070    
2071  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2072  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2073  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2074                   int dataPointNo)                   int dataPointNo)
2075  {  {
2076    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2077    {    {
2078      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2079    }    }
2080    if (m_readytype!='E')    if (m_readytype!='E')
2081    {    {
2082      collapse();          collapse();
2083      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2084    }    }
2085    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
2086    // so we only need to know which child to ask    // so we only need to know which child to ask
2087    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
2088    {    {
2089      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2090    }    }
2091    else    else
2092    {    {
2093      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2094    }    }
2095  }  }
2096    
2097  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2098  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2099  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2100                   int dataPointNo) const                   int dataPointNo) const
2101  {  {
2102    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2103    {    {
2104      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2105    }    }
2106    if (m_readytype=='E')    if (m_readytype=='E')
2107    {    {
# Line 1799  DataLazy::getPointOffset(int sampleNo, Line 2109  DataLazy::getPointOffset(int sampleNo,
2109      // so we only need to know which child to ask      // so we only need to know which child to ask
2110      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
2111      {      {
2112      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2113      }      }
2114      else      else
2115      {      {
2116      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2117      }      }
2118    }    }
2119    if (m_readytype=='C')    if (m_readytype=='C')
2120    {    {
2121      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2122    }    }
2123    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2124  }  }
# Line 1818  DataLazy::getPointOffset(int sampleNo, Line 2128  DataLazy::getPointOffset(int sampleNo,
2128  void  void
2129  DataLazy::setToZero()  DataLazy::setToZero()
2130  {  {
2131  //   DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2132  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2133  //   m_op=IDENTITY;  //   m_op=IDENTITY;
2134  //   m_right.reset();    //   m_right.reset();  
# Line 1826  DataLazy::setToZero() Line 2136  DataLazy::setToZero()
2136  //   m_readytype='C';  //   m_readytype='C';
2137  //   m_buffsRequired=1;  //   m_buffsRequired=1;
2138    
2139    privdebug=privdebug;  // to stop the compiler complaining about unused privdebug    (void)privdebug;  // to stop the compiler complaining about unused privdebug
2140    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2141  }  }
2142    
2143  bool  bool
2144  DataLazy::actsExpanded() const  DataLazy::actsExpanded() const
2145  {  {
2146      return (m_readytype=='E');          return (m_readytype=='E');
2147  }  }
2148    
2149  }   // end namespace  } // end namespace
2150    

Legend:
Removed from v.2770  
changed lines
  Added in v.6125

  ViewVC Help
Powered by ViewVC 1.1.26