/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2092 by jfenwick, Tue Nov 25 04:18:17 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 6083 by jfenwick, Tue Mar 22 03:30:18 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18    #include "Data.h"
19    #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "Utils.h"
23    #include "DataMaths.h"
24    
25  #ifdef USE_NETCDF  #ifdef USE_NETCDF
26  #include <netcdfcpp.h>  #include <netcdfcpp.h>
27  #endif  #endif
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
 #include "Data.h"  
 #include "UnaryFuncs.h"     // for escript::fsign  
 #include "Utils.h"  
28    
29  // #define LAZYDEBUG(X) X;  #include <iomanip> // for some fancy formatting in debug
30    
31    using namespace escript::DataTypes;
32    
33    #define NO_ARG
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48    
49    
50    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52  /*  /*
53  How does DataLazy work?  How does DataLazy work?
# Line 42  A special operation, IDENTITY, stores an Line 60  A special operation, IDENTITY, stores an
60  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
64    
65  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 50  I will refer to individual DataLazy obje Line 68  I will refer to individual DataLazy obje
68    
69  Each node also stores:  Each node also stores:
70  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71      evaluated.          evaluated.
72  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
73      evaluate the expression.          evaluate the expression.
74  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
75    
76  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 73  The convention that I use, is that the r Line 91  The convention that I use, is that the r
91  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93    
94  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95  1) Add to the ES_optype.  1) Add to the ES_optype.
96  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
97  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 93  namespace escript Line 111  namespace escript
111  namespace  namespace
112  {  {
113    
114    
115    // enabling this will print out when ever the maximum stacksize used by resolve increases
116    // it assumes _OPENMP is also in use
117    //#define LAZY_STACK_PROF
118    
119    
120    
121    #ifndef _OPENMP
122      #ifdef LAZY_STACK_PROF
123      #undef LAZY_STACK_PROF
124      #endif
125    #endif
126    
127    
128    #ifdef LAZY_STACK_PROF
129    std::vector<void*> stackstart(getNumberOfThreads());
130    std::vector<void*> stackend(getNumberOfThreads());
131    size_t maxstackuse=0;
132    #endif
133    
134  enum ES_opgroup  enum ES_opgroup
135  {  {
136     G_UNKNOWN,     G_UNKNOWN,
137     G_IDENTITY,     G_IDENTITY,
138     G_BINARY,        // pointwise operations with two arguments     G_BINARY,            // pointwise operations with two arguments
139     G_UNARY,     // pointwise operations with one argument     G_UNARY,             // pointwise operations with one argument
140     G_NP1OUT,        // non-pointwise op with one output     G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
141     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT,            // non-pointwise op with one output
142     G_TENSORPROD     // general tensor product     G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
143       G_TENSORPROD,        // general tensor product
144       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
145       G_REDUCTION,         // non-pointwise unary op with a scalar output
146       G_CONDEVAL
147  };  };
148    
149    
150    
151    
152  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
153              "sin","cos","tan",                          "sin","cos","tan",
154              "asin","acos","atan","sinh","cosh","tanh","erf",                          "asin","acos","atan","sinh","cosh","tanh","erf",
155              "asinh","acosh","atanh",                          "asinh","acosh","atanh",
156              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "log10","log","sign","abs","neg","pos","exp","sqrt",
157              "1/","where>0","where<0","where>=0","where<=0",                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
158              "symmetric","nonsymmetric",                          "symmetric","nonsymmetric",
159              "prod",                          "prod",
160              "transpose",                          "transpose", "trace",
161              "trace"};                          "swapaxes",
162  int ES_opcount=38;                          "minval", "maxval",
163                            "condEval"};
164    int ES_opcount=44;
165  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
166              G_UNARY,G_UNARY,G_UNARY, //10                          G_UNARY,G_UNARY,G_UNARY, //10
167              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
168              G_UNARY,G_UNARY,G_UNARY,                    // 20                          G_UNARY,G_UNARY,G_UNARY,                                        // 20
169              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
170              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
171              G_NP1OUT,G_NP1OUT,                          G_NP1OUT,G_NP1OUT,
172              G_TENSORPROD,                          G_TENSORPROD,
173              G_NP1OUT_P, G_NP1OUT_P};                          G_NP1OUT_P, G_NP1OUT_P,
174                            G_NP1OUT_2P,
175                            G_REDUCTION, G_REDUCTION,
176                            G_CONDEVAL};
177  inline  inline
178  ES_opgroup  ES_opgroup
179  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 138  getOpgroup(ES_optype op) Line 185  getOpgroup(ES_optype op)
185  FunctionSpace  FunctionSpace
186  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187  {  {
188      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
189      // maybe we need an interpolate node -          // maybe we need an interpolate node -
190      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
191      // programming error exception.          // programming error exception.
192    
193    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
194    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
195    if (l!=r)    if (l!=r)
196    {    {
197      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
198        if (res==1)
199      {      {
200      return l;          return l;
201      }      }
202      if (l.probeInterpolation(r))      if (res==-1)
203      {      {
204      return r;          return r;
205      }      }
206      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
207    }    }
# Line 165  resultFS(DataAbstract_ptr left, DataAbst Line 213  resultFS(DataAbstract_ptr left, DataAbst
213  DataTypes::ShapeType  DataTypes::ShapeType
214  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
215  {  {
216      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
217      {          {
218        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
219        {            {
220          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221        }            }
222        if (left->getRank()==0)   // we need to allow scalar * anything  
223        {            if (left->getRank()==0)       // we need to allow scalar * anything
224          return right->getShape();            {
225        }                  return right->getShape();
226        if (right->getRank()==0)            }
227        {            if (right->getRank()==0)
228          return left->getShape();            {
229        }                  return left->getShape();
230        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            }
231      }            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
232      return left->getShape();          }
233            return left->getShape();
234  }  }
235    
236  // return the shape for "op left"  // return the shape for "op left"
237    
238  DataTypes::ShapeType  DataTypes::ShapeType
239  resultShape(DataAbstract_ptr left, ES_optype op)  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240  {  {
241      switch(op)          switch(op)
242      {          {
243          case TRANS:          case TRANS:
244          return left->getShape();             {                    // for the scoping of variables
245      break;                  const DataTypes::ShapeType& s=left->getShape();
246      case TRACE:                  DataTypes::ShapeType sh;
247          return DataTypes::scalarShape;                  int rank=left->getRank();
248      break;                  if (axis_offset<0 || axis_offset>rank)
249          default:                  {
250      throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");              stringstream e;
251      }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252                throw DataException(e.str());
253            }
254            for (int i=0; i<rank; i++)
255                    {
256                       int index = (axis_offset+i)%rank;
257               sh.push_back(s[index]); // Append to new shape
258            }
259                    return sh;
260               }
261            break;
262            case TRACE:
263               {
264                    int rank=left->getRank();
265                    if (rank<2)
266                    {
267                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268                    }
269                    if ((axis_offset>rank-2) || (axis_offset<0))
270                    {
271                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272                    }
273                    if (rank==2)
274                    {
275                       return DataTypes::scalarShape;
276                    }
277                    else if (rank==3)
278                    {
279                       DataTypes::ShapeType sh;
280                       if (axis_offset==0)
281                       {
282                            sh.push_back(left->getShape()[2]);
283                       }
284                       else         // offset==1
285                       {
286                            sh.push_back(left->getShape()[0]);
287                       }
288                       return sh;
289                    }
290                    else if (rank==4)
291                    {
292                       DataTypes::ShapeType sh;
293                       const DataTypes::ShapeType& s=left->getShape();
294                       if (axis_offset==0)
295                       {
296                            sh.push_back(s[2]);
297                            sh.push_back(s[3]);
298                       }
299                       else if (axis_offset==1)
300                       {
301                            sh.push_back(s[0]);
302                            sh.push_back(s[3]);
303                       }
304                       else         // offset==2
305                       {
306                            sh.push_back(s[0]);
307                            sh.push_back(s[1]);
308                       }
309                       return sh;
310                    }
311                    else            // unknown rank
312                    {
313                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314                    }
315               }
316            break;
317            default:
318            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319            }
320  }  }
321    
322    DataTypes::ShapeType
323    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
324    {
325         // This code taken from the Data.cpp swapaxes() method
326         // Some of the checks are probably redundant here
327         int axis0_tmp,axis1_tmp;
328         const DataTypes::ShapeType& s=left->getShape();
329         DataTypes::ShapeType out_shape;
330         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332         int rank=left->getRank();
333         if (rank<2) {
334            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335         }
336         if (axis0<0 || axis0>rank-1) {
337            stringstream e;
338            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339            throw DataException(e.str());
340         }
341         if (axis1<0 || axis1>rank-1) {
342            stringstream e;
343            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344            throw DataException(e.str());
345         }
346         if (axis0 == axis1) {
347             throw DataException("Error - Data::swapaxes: axis indices must be different.");
348         }
349         if (axis0 > axis1) {
350             axis0_tmp=axis1;
351             axis1_tmp=axis0;
352         } else {
353             axis0_tmp=axis0;
354             axis1_tmp=axis1;
355         }
356         for (int i=0; i<rank; i++) {
357           if (i == axis0_tmp) {
358              out_shape.push_back(s[axis1_tmp]);
359           } else if (i == axis1_tmp) {
360              out_shape.push_back(s[axis0_tmp]);
361           } else {
362              out_shape.push_back(s[i]);
363           }
364         }
365        return out_shape;
366    }
367    
368    
369  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
370  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
371  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
372  DataTypes::ShapeType  DataTypes::ShapeType
373  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
374  {  {
375                
376    // Get rank and shape of inputs    // Get rank and shape of inputs
377    int rank0 = left->getRank();    int rank0 = left->getRank();
378    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 217  GTPShape(DataAbstract_ptr left, DataAbst Line 381  GTPShape(DataAbstract_ptr left, DataAbst
381    
382    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
383    int start0=0, start1=0;    int start0=0, start1=0;
384    if (transpose == 0)       {}    if (transpose == 0)           {}
385    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
386    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
387    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388    
389    if (rank0<axis_offset)    if (rank0<axis_offset)
390    {    {
391      throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");          throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392    }    }
393    
394    // Adjust the shapes for transpose    // Adjust the shapes for transpose
395    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
396    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
397    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
398    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
399    
400    // Prepare for the loops of the product    // Prepare for the loops of the product
401    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
402    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
403      SL *= tmpShape0[i];      SL *= tmpShape0[i];
404    }    }
405    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
406      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
407        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
408      }      }
409      SM *= tmpShape0[i];      SM *= tmpShape0[i];
410    }    }
411    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
412      SR *= tmpShape1[i];      SR *= tmpShape1[i];
413    }    }
414    
415    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
416    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
417    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
418       int out_index=0;       int out_index=0;
419       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
# Line 266  GTPShape(DataAbstract_ptr left, DataAbst Line 430  GTPShape(DataAbstract_ptr left, DataAbst
430    return shape2;    return shape2;
431  }  }
432    
433    }       // end anonymous namespace
 // determine the number of points in the result of "left op right"  
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
 }   // end anonymous namespace  
434    
435    
436    
# Line 317  opToString(ES_optype op) Line 445  opToString(ES_optype op)
445    return ES_opstrings[op];    return ES_opstrings[op];
446  }  }
447    
448    void DataLazy::LazyNodeSetup()
449    {
450    #ifdef _OPENMP
451        int numthreads=omp_get_max_threads();
452        m_samples.resize(numthreads*m_samplesize);
453        m_sampleids=new int[numthreads];
454        for (int i=0;i<numthreads;++i)
455        {
456            m_sampleids[i]=-1;  
457        }
458    #else
459        m_samples.resize(m_samplesize);
460        m_sampleids=new int[1];
461        m_sampleids[0]=-1;
462    #endif  // _OPENMP
463    }
464    
465    
466    // Creates an identity node
467  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
468      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
469      m_op(IDENTITY),          ,m_sampleids(0),
470      m_axis_offset(0),          m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
471  {  {
472     if (p->isLazy())     if (p->isLazy())
473     {     {
474      // I don't want identity of Lazy.          // I don't want identity of Lazy.
475      // Question: Why would that be so bad?          // Question: Why would that be so bad?
476      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
477      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
478     }     }
479     else     else
480     {     {
481      m_id=dynamic_pointer_cast<DataReady>(p);          p->makeLazyShared();
482      if(p->isConstant()) {m_readytype='C';}          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
483      else if(p->isExpanded()) {m_readytype='E';}          makeIdentity(dr);
484      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
485     }     }
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
486  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
487  }  }
488    
   
   
   
489  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
490      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
491      m_op(op),          m_op(op),
492      m_axis_offset(0),          m_axis_offset(0),
493      m_transpose(0),          m_transpose(0),
494      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
495  {  {
496     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
497     {     {
498      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
499     }     }
500    
501     DataLazy_ptr lleft;     DataLazy_ptr lleft;
502     if (!left->isLazy())     if (!left->isLazy())
503     {     {
504      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
505     }     }
506     else     else
507     {     {
508      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
509     }     }
510     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
511     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
512     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
513     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
514       m_height=m_left->m_height+1;
515       LazyNodeSetup();
516       SIZELIMIT
517  }  }
518    
519    
520  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
521  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
522      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
523      m_op(op),          m_op(op),
524      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
525  {  {
526    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
527     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
528     {     {
529      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
530     }     }
531    
532     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
533     {     {
534      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
535      Data ltemp(left);          Data ltemp(left);
536      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
537      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
538     }     }
539     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
540     {     {
541      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
542      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
543    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
544     }     }
545     left->operandCheck(*right);     left->operandCheck(*right);
546    
547     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
548     {     {
549      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
550    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
551     }     }
552     else     else
553     {     {
554      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
555    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
556     }     }
557     if (right->isLazy())     if (right->isLazy())
558     {     {
559      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
560    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
561     }     }
562     else     else
563     {     {
564      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
565    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
566     }     }
567     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
568     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
569     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
570     {     {
571      m_readytype='E';          m_readytype='E';
572     }     }
573     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
574     {     {
575      m_readytype='T';          m_readytype='T';
576     }     }
577     else     else
578     {     {
579      m_readytype='C';          m_readytype='C';
580     }     }
581     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
582     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
583     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
584       LazyNodeSetup();
585       SIZELIMIT
586  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
587  }  }
588    
589  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
590      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
591      m_op(op),          m_op(op),
592      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
593      m_transpose(transpose)          m_transpose(transpose)
594  {  {
595     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
596     {     {
597      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
598     }     }
599     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
600     {     {
601      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
602     }     }
603     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
604     {     {
605      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
606      Data ltemp(left);          Data ltemp(left);
607      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
608      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
609     }     }
610     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
611     {     {
612      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
613      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
614     }     }
615     left->operandCheck(*right);  //    left->operandCheck(*right);
616    
617     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
618     {     {
619      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
620     }     }
621     else     else
622     {     {
623      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
624     }     }
625     if (right->isLazy())     if (right->isLazy())
626     {     {
627      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
628     }     }
629     else     else
630     {     {
631      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
632     }     }
633     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
634     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
635     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
636     {     {
637      m_readytype='E';          m_readytype='E';
638     }     }
639     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
640     {     {
641      m_readytype='T';          m_readytype='T';
642     }     }
643     else     else
644     {     {
645      m_readytype='C';          m_readytype='C';
646     }     }
647     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
648     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
649     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
650       LazyNodeSetup();
651       SIZELIMIT
652  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
653  }  }
654    
655    
656  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
657      : parent(left->getFunctionSpace(), resultShape(left,op)),          : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
658      m_op(op),          m_op(op),
659      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
660      m_transpose(0)          m_transpose(0),
661            m_tol(0)
662  {  {
663     if ((getOpgroup(op)!=G_NP1OUT_P))     if ((getOpgroup(op)!=G_NP1OUT_P))
664     {     {
665      throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
666     }     }
667     DataLazy_ptr lleft;     DataLazy_ptr lleft;
668     if (!left->isLazy())     if (!left->isLazy())
669     {     {
670      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
671     }     }
672     else     else
673     {     {
674      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
675     }     }
676     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
677     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
678     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
679     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
680       m_height=m_left->m_height+1;
681       LazyNodeSetup();
682       SIZELIMIT
683  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
684  }  }
685    
686    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
687            : parent(left->getFunctionSpace(), left->getShape()),
688            m_op(op),
689            m_axis_offset(0),
690            m_transpose(0),
691            m_tol(tol)
692    {
693       if ((getOpgroup(op)!=G_UNARY_P))
694       {
695            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
696       }
697       DataLazy_ptr lleft;
698       if (!left->isLazy())
699       {
700            lleft=DataLazy_ptr(new DataLazy(left));
701       }
702       else
703       {
704            lleft=dynamic_pointer_cast<DataLazy>(left);
705       }
706       m_readytype=lleft->m_readytype;
707       m_left=lleft;
708       m_samplesize=getNumDPPSample()*getNoValues();
709       m_children=m_left->m_children+1;
710       m_height=m_left->m_height+1;
711       LazyNodeSetup();
712       SIZELIMIT
713    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
714    }
715    
716  DataLazy::~DataLazy()  
717    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
718            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
719            m_op(op),
720            m_axis_offset(axis0),
721            m_transpose(axis1),
722            m_tol(0)
723  {  {
724       if ((getOpgroup(op)!=G_NP1OUT_2P))
725       {
726            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
727       }
728       DataLazy_ptr lleft;
729       if (!left->isLazy())
730       {
731            lleft=DataLazy_ptr(new DataLazy(left));
732       }
733       else
734       {
735            lleft=dynamic_pointer_cast<DataLazy>(left);
736       }
737       m_readytype=lleft->m_readytype;
738       m_left=lleft;
739       m_samplesize=getNumDPPSample()*getNoValues();
740       m_children=m_left->m_children+1;
741       m_height=m_left->m_height+1;
742       LazyNodeSetup();
743       SIZELIMIT
744    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
745  }  }
746    
747    
748  int  namespace
 DataLazy::getBuffsRequired() const  
749  {  {
750      return m_buffsRequired;  
751        inline int max3(int a, int b, int c)
752        {
753            int t=(a>b?a:b);
754            return (t>c?t:c);
755    
756        }
757  }  }
758    
759    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
760            : parent(left->getFunctionSpace(), left->getShape()),
761            m_op(CONDEVAL),
762            m_axis_offset(0),
763            m_transpose(0),
764            m_tol(0)
765    {
766    
767  size_t     DataLazy_ptr lmask;
768  DataLazy::getMaxSampleSize() const     DataLazy_ptr lleft;
769       DataLazy_ptr lright;
770       if (!mask->isLazy())
771       {
772            lmask=DataLazy_ptr(new DataLazy(mask));
773       }
774       else
775       {
776            lmask=dynamic_pointer_cast<DataLazy>(mask);
777       }
778       if (!left->isLazy())
779       {
780            lleft=DataLazy_ptr(new DataLazy(left));
781       }
782       else
783       {
784            lleft=dynamic_pointer_cast<DataLazy>(left);
785       }
786       if (!right->isLazy())
787       {
788            lright=DataLazy_ptr(new DataLazy(right));
789       }
790       else
791       {
792            lright=dynamic_pointer_cast<DataLazy>(right);
793       }
794       m_readytype=lmask->m_readytype;
795       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
796       {
797            throw DataException("Programmer Error - condEval arguments must have the same readytype");
798       }
799       m_left=lleft;
800       m_right=lright;
801       m_mask=lmask;
802       m_samplesize=getNumDPPSample()*getNoValues();
803       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
804       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
805       LazyNodeSetup();
806       SIZELIMIT
807    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
808    }
809    
810    
811    
812    DataLazy::~DataLazy()
813  {  {
814      return m_maxsamplesize;     delete[] m_sampleids;
815  }  }
816    
817    
818  /*  /*
819    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
820    This does the work for the collapse method.    This does the work for the collapse method.
821    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
822  */  */
823  DataReady_ptr  DataReady_ptr
824  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
825  {  {
826    if (m_readytype=='E')    if (m_readytype=='E')
827    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
828      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
829    }    }
830    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 577  DataLazy::collapseToReady() Line 842  DataLazy::collapseToReady()
842    switch(m_op)    switch(m_op)
843    {    {
844      case ADD:      case ADD:
845      result=left+right;          result=left+right;
846      break;          break;
847      case SUB:            case SUB:          
848      result=left-right;          result=left-right;
849      break;          break;
850      case MUL:            case MUL:          
851      result=left*right;          result=left*right;
852      break;          break;
853      case DIV:            case DIV:          
854      result=left/right;          result=left/right;
855      break;          break;
856      case SIN:      case SIN:
857      result=left.sin();            result=left.sin();      
858      break;          break;
859      case COS:      case COS:
860      result=left.cos();          result=left.cos();
861      break;          break;
862      case TAN:      case TAN:
863      result=left.tan();          result=left.tan();
864      break;          break;
865      case ASIN:      case ASIN:
866      result=left.asin();          result=left.asin();
867      break;          break;
868      case ACOS:      case ACOS:
869      result=left.acos();          result=left.acos();
870      break;          break;
871      case ATAN:      case ATAN:
872      result=left.atan();          result=left.atan();
873      break;          break;
874      case SINH:      case SINH:
875      result=left.sinh();          result=left.sinh();
876      break;          break;
877      case COSH:      case COSH:
878      result=left.cosh();          result=left.cosh();
879      break;          break;
880      case TANH:      case TANH:
881      result=left.tanh();          result=left.tanh();
882      break;          break;
883      case ERF:      case ERF:
884      result=left.erf();          result=left.erf();
885      break;          break;
886     case ASINH:     case ASINH:
887      result=left.asinh();          result=left.asinh();
888      break;          break;
889     case ACOSH:     case ACOSH:
890      result=left.acosh();          result=left.acosh();
891      break;          break;
892     case ATANH:     case ATANH:
893      result=left.atanh();          result=left.atanh();
894      break;          break;
895      case LOG10:      case LOG10:
896      result=left.log10();          result=left.log10();
897      break;          break;
898      case LOG:      case LOG:
899      result=left.log();          result=left.log();
900      break;          break;
901      case SIGN:      case SIGN:
902      result=left.sign();          result=left.sign();
903      break;          break;
904      case ABS:      case ABS:
905      result=left.abs();          result=left.abs();
906      break;          break;
907      case NEG:      case NEG:
908      result=left.neg();          result=left.neg();
909      break;          break;
910      case POS:      case POS:
911      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
912      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
913      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
914      break;          break;
915      case EXP:      case EXP:
916      result=left.exp();          result=left.exp();
917      break;          break;
918      case SQRT:      case SQRT:
919      result=left.sqrt();          result=left.sqrt();
920      break;          break;
921      case RECIP:      case RECIP:
922      result=left.oneOver();          result=left.oneOver();
923      break;          break;
924      case GZ:      case GZ:
925      result=left.wherePositive();          result=left.wherePositive();
926      break;          break;
927      case LZ:      case LZ:
928      result=left.whereNegative();          result=left.whereNegative();
929      break;          break;
930      case GEZ:      case GEZ:
931      result=left.whereNonNegative();          result=left.whereNonNegative();
932      break;          break;
933      case LEZ:      case LEZ:
934      result=left.whereNonPositive();          result=left.whereNonPositive();
935      break;          break;
936        case NEZ:
937            result=left.whereNonZero(m_tol);
938            break;
939        case EZ:
940            result=left.whereZero(m_tol);
941            break;
942      case SYM:      case SYM:
943      result=left.symmetric();          result=left.symmetric();
944      break;          break;
945      case NSYM:      case NSYM:
946      result=left.nonsymmetric();          result=left.nonsymmetric();
947      break;          break;
948      case PROD:      case PROD:
949      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
950      break;          break;
951      case TRANS:      case TRANS:
952      result=left.transpose(m_axis_offset);          result=left.transpose(m_axis_offset);
953      break;          break;
954      case TRACE:      case TRACE:
955      result=left.trace(m_axis_offset);          result=left.trace(m_axis_offset);
956      break;          break;
957        case SWAP:
958            result=left.swapaxes(m_axis_offset, m_transpose);
959            break;
960        case MINVAL:
961            result=left.minval();
962            break;
963        case MAXVAL:
964            result=left.minval();
965            break;
966      default:      default:
967      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
968    }    }
969    return result.borrowReadyPtr();    return result.borrowReadyPtr();
970  }  }
# Line 696  DataLazy::collapseToReady() Line 976  DataLazy::collapseToReady()
976     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
977  */  */
978  void  void
979  DataLazy::collapse()  DataLazy::collapse() const
980  {  {
981    if (m_op==IDENTITY)    if (m_op==IDENTITY)
982    {    {
983      return;          return;
984    }    }
985    if (m_readytype=='E')    if (m_readytype=='E')
986    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
987      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
988    }    }
989    m_id=collapseToReady();    m_id=collapseToReady();
990    m_op=IDENTITY;    m_op=IDENTITY;
991  }  }
992    
993  /*  // The result will be stored in m_samples
994    \brief Compute the value of the expression (unary operation) for the given sample.  // The return value is a pointer to the DataVector, offset is the offset within the return value
995    \return Vector which stores the value of the subexpression for the given sample.  const DataTypes::RealVectorType*
996    \param v A vector to store intermediate results.  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
997  {  {
998      // we assume that any collapsing has been done before we get here  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
999      // since we only have one argument we don't need to think about only          // collapse so we have a 'E' node or an IDENTITY for some other type
1000      // processing single points.    if (m_readytype!='E' && m_op!=IDENTITY)
1001      {
1002            collapse();
1003      }
1004      if (m_op==IDENTITY)  
1005      {
1006        const RealVectorType& vec=m_id->getVectorRO();
1007        roffset=m_id->getPointOffset(sampleNo, 0);
1008    #ifdef LAZY_STACK_PROF
1009    int x;
1010    if (&x<stackend[omp_get_thread_num()])
1011    {
1012           stackend[omp_get_thread_num()]=&x;
1013    }
1014    #endif
1015        return &(vec);
1016      }
1017      if (m_readytype!='E')
1018      {
1019        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1020      }
1021      if (m_sampleids[tid]==sampleNo)
1022      {
1023            roffset=tid*m_samplesize;
1024            return &(m_samples);            // sample is already resolved
1025      }
1026      m_sampleids[tid]=sampleNo;
1027    
1028      switch (getOpgroup(m_op))
1029      {
1030      case G_UNARY:
1031      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1032      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1033      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1034      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1035      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1036      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1037      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1038      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1039      default:
1040        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1041      }
1042    }
1043    
1044    const DataTypes::RealVectorType*
1045    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1046    {
1047            // we assume that any collapsing has been done before we get here
1048            // since we only have one argument we don't need to think about only
1049            // processing single points.
1050            // we will also know we won't get identity nodes
1051    if (m_readytype!='E')    if (m_readytype!='E')
1052    {    {
1053      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1054    }    }
1055    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1056    const double* left=&((*vleft)[roffset]);    {
1057    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1058    roffset=offset;    }
1059      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1060      const double* left=&((*leftres)[roffset]);
1061      roffset=m_samplesize*tid;
1062      double* result=&(m_samples[roffset]);
1063      escript::ESFunction operation=SINF;
1064    switch (m_op)    switch (m_op)
1065    {    {
1066      case SIN:        case SIN:
1067      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);      operation=SINF;
1068      break;      break;
1069      case COS:      case COS:
1070      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);          operation=COSF;
1071      break;      break;
1072      case TAN:      case TAN:
1073      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);          operation=TANF;
1074      break;      break;
1075      case ASIN:      case ASIN:
1076      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);          operation=ASINF;
1077      break;      break;
1078      case ACOS:      case ACOS:
1079      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);          operation=ACOSF;
1080      break;      break;
1081      case ATAN:      case ATAN:
1082      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);          operation=ATANF;
1083      break;      break;
1084      case SINH:      case SINH:
1085      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);          operation=SINHF;
1086      break;      break;
1087      case COSH:      case COSH:
1088      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);          operation=COSHF;
1089      break;      break;
1090      case TANH:      case TANH:
1091      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);          operation=TANHF;
1092      break;      break;
1093      case ERF:      case ERF:
1094  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ERFF;
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
1095      break;      break;
 #endif  
1096     case ASINH:     case ASINH:
1097  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ASINHF;
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
1098      break;      break;
1099     case ACOSH:     case ACOSH:
1100  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ACOSHF;
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
1101      break;      break;
1102     case ATANH:     case ATANH:
1103  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ATANHF;
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
1104      break;      break;
1105      case LOG10:      case LOG10:
1106      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);          operation=LOG10F;
1107      break;      break;
1108      case LOG:      case LOG:
1109      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);          operation=LOGF;
1110      break;      break;
1111      case SIGN:      case SIGN:
1112      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          operation=SIGNF;
1113      break;      break;
1114      case ABS:      case ABS:
1115      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);          operation=ABSF;
1116      break;      break;
1117      case NEG:      case NEG:
1118      tensor_unary_operation(m_samplesize, left, result, negate<double>());          operation=NEGF;
1119      break;      break;
1120      case POS:      case POS:
1121      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1122      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1123      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1124      break;          break;
1125      case EXP:      case EXP:
1126      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);          operation=EXPF;
1127      break;      break;
1128      case SQRT:      case SQRT:
1129      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);          operation=SQRTF;
1130      break;      break;
1131      case RECIP:      case RECIP:
1132      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          operation=INVF;
1133      break;      break;
1134      case GZ:      case GZ:
1135      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          operation=GTZEROF;
1136      break;      break;
1137      case LZ:      case LZ:
1138      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          operation=LTZEROF;
1139      break;      break;
1140      case GEZ:      case GEZ:
1141      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          operation=GEZEROF;
1142      break;      break;
1143      case LEZ:      case LEZ:
1144      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          operation=LEZEROF;
1145        break;
1146    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1147        case NEZ:
1148            operation=NEQZEROF;
1149        break;
1150        case EZ:
1151            operation=EQZEROF;
1152      break;      break;
   
1153      default:      default:
1154      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1155    }    }
1156    return &v;    tensor_unary_array_operation(m_samplesize,
1157                                 left,
1158                                 result,
1159                                 operation,
1160                                 m_tol);  
1161      return &(m_samples);
1162  }  }
1163    
1164    
1165  /*  const DataTypes::RealVectorType*
1166    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1167  {  {
1168      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1169      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1170      // processing single points.          // processing single points.
1171            // we will also know we won't get identity nodes
1172    if (m_readytype!='E')    if (m_readytype!='E')
1173    {    {
1174      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1175    }    }
1176      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1177    size_t subroffset=roffset+m_samplesize;    {
1178    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1179    roffset=offset;    }
1180      size_t loffset=0;
1181      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1182    
1183      roffset=m_samplesize*tid;
1184      unsigned int ndpps=getNumDPPSample();
1185      unsigned int psize=DataTypes::noValues(m_left->getShape());
1186      double* result=&(m_samples[roffset]);
1187      switch (m_op)
1188      {
1189        case MINVAL:
1190            {
1191              for (unsigned int z=0;z<ndpps;++z)
1192              {
1193                FMin op;
1194                *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1195                loffset+=psize;
1196                result++;
1197              }
1198            }
1199            break;
1200        case MAXVAL:
1201            {
1202              for (unsigned int z=0;z<ndpps;++z)
1203              {
1204              FMax op;
1205              *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1206              loffset+=psize;
1207              result++;
1208              }
1209            }
1210            break;
1211        default:
1212            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1213      }
1214      return &(m_samples);
1215    }
1216    
1217    const DataTypes::RealVectorType*
1218    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1219    {
1220            // we assume that any collapsing has been done before we get here
1221            // since we only have one argument we don't need to think about only
1222            // processing single points.
1223      if (m_readytype!='E')
1224      {
1225        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1226      }
1227      if (m_op==IDENTITY)
1228      {
1229        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1230      }
1231      size_t subroffset;
1232      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1233      roffset=m_samplesize*tid;
1234      size_t loop=0;
1235      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1236      size_t step=getNoValues();
1237      size_t offset=roffset;
1238    switch (m_op)    switch (m_op)
1239    {    {
1240      case SYM:      case SYM:
1241      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1242      break;          {
1243                escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1244                subroffset+=step;
1245                offset+=step;
1246            }
1247            break;
1248      case NSYM:      case NSYM:
1249      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1250      break;          {
1251                escript::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1252                subroffset+=step;
1253                offset+=step;
1254            }
1255            break;
1256      default:      default:
1257      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1258    }    }
1259    return &v;    return &m_samples;
1260  }  }
1261    
1262  /*  const DataTypes::RealVectorType*
1263    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1264  {  {
1265      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1266      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1267      // processing single points.          // processing single points.
1268    if (m_readytype!='E')    if (m_readytype!='E')
1269    {    {
1270      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1271    }    }
1272      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1273    size_t subroffset=roffset+m_samplesize;    {
1274    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1275    roffset=offset;    }
1276      size_t subroffset;
1277      size_t offset;
1278      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1279      roffset=m_samplesize*tid;
1280      offset=roffset;
1281      size_t loop=0;
1282      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1283      size_t outstep=getNoValues();
1284      size_t instep=m_left->getNoValues();
1285    switch (m_op)    switch (m_op)
1286    {    {
1287      case TRACE:      case TRACE:
1288           DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);          for (loop=0;loop<numsteps;++loop)
1289      break;          {
1290                escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1291                subroffset+=instep;
1292                offset+=outstep;
1293            }
1294            break;
1295      case TRANS:      case TRANS:
1296           DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);          for (loop=0;loop<numsteps;++loop)
1297      break;          {
1298                escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1299                subroffset+=instep;
1300                offset+=outstep;
1301            }
1302            break;
1303        default:
1304            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1305      }
1306      return &m_samples;
1307    }
1308    
1309    
1310    const DataTypes::RealVectorType*
1311    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1312    {
1313      if (m_readytype!='E')
1314      {
1315        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1316      }
1317      if (m_op==IDENTITY)
1318      {
1319        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1320      }
1321      size_t subroffset;
1322      size_t offset;
1323      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1324      roffset=m_samplesize*tid;
1325      offset=roffset;
1326      size_t loop=0;
1327      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1328      size_t outstep=getNoValues();
1329      size_t instep=m_left->getNoValues();
1330      switch (m_op)
1331      {
1332        case SWAP:
1333            for (loop=0;loop<numsteps;++loop)
1334            {
1335                escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1336                subroffset+=instep;
1337                offset+=outstep;
1338            }
1339            break;
1340      default:      default:
1341      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1342    }    }
1343    return &v;    return &m_samples;
1344  }  }
1345    
1346    const DataTypes::RealVectorType*
1347    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1348    {
1349      if (m_readytype!='E')
1350      {
1351        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1352      }
1353      if (m_op!=CONDEVAL)
1354      {
1355        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1356      }
1357      size_t subroffset;
1358    
1359  #define PROC_OP(TYPE,X)                               \    const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1360      for (int i=0;i<steps;++i,resultp+=resultStep) \    const RealVectorType* srcres=0;
1361      { \    if ((*maskres)[subroffset]>0)
1362         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    {
1363         lroffset+=leftStep; \          srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1364         rroffset+=rightStep; \    }
1365      }    else
1366      {
1367            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1368      }
1369    
1370      // Now we need to copy the result
1371    
1372      roffset=m_samplesize*tid;
1373      for (int i=0;i<m_samplesize;++i)
1374      {
1375            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1376      }
1377    
1378      return &m_samples;
1379    }
1380    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1381  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1382  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1383  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 952  DataLazy::resolveNP1OUT_P(ValueType& v, Line 1387  DataLazy::resolveNP1OUT_P(ValueType& v,
1387  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1388  // For example, 2+Vector.  // For example, 2+Vector.
1389  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1390  DataTypes::ValueType*  const DataTypes::RealVectorType*
1391  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1392  {  {
1393  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1394    
1395    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1396      // first work out which of the children are expanded          // first work out which of the children are expanded
1397    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1398    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1399    if (!leftExp && !rightExp)    if (!leftExp && !rightExp)
1400    {    {
1401      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1402    }    }
1403    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1404    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1405    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1406    int steps=(bigloops?1:getNumDPPSample());    {
1407    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1408    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1409    {    size_t leftsize=m_left->getNoValues();
1410      if (!leftScalar && !rightScalar)    size_t rightsize=m_right->getNoValues();
1411      {    size_t chunksize=1;                   // how many doubles will be processed in one go
1412         throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");    int leftstep=0;               // how far should the left offset advance after each step
1413      }    int rightstep=0;
1414      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    int numsteps=0;               // total number of steps for the inner loop
1415      chunksize=1;    // for scalar    int oleftstep=0;      // the o variables refer to the outer loop
1416    }        int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1417    int leftStep=((leftExp && (!rightExp || rightScalar))? m_right->getNoValues() : 0);    int onumsteps=1;
1418    int rightStep=((rightExp && (!leftExp || leftScalar))? m_left->getNoValues() : 0);    
1419    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1420      // Get the values of sub-expressions    bool RES=(rightExp && rightScalar);
1421    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    bool LS=(!leftExp && leftScalar);     // left is a single scalar
1422    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    bool RS=(!rightExp && rightScalar);
1423      // the right child starts further along.    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1424    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    bool RN=(!rightExp && !rightScalar);
1425      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1426      bool REN=(rightExp && !rightScalar);
1427    
1428      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1429      {
1430            chunksize=m_left->getNumDPPSample()*leftsize;
1431            leftstep=0;
1432            rightstep=0;
1433            numsteps=1;
1434      }
1435      else if (LES || RES)
1436      {
1437            chunksize=1;
1438            if (LES)                // left is an expanded scalar
1439            {
1440                    if (RS)
1441                    {
1442                       leftstep=1;
1443                       rightstep=0;
1444                       numsteps=m_left->getNumDPPSample();
1445                    }
1446                    else            // RN or REN
1447                    {
1448                       leftstep=0;
1449                       oleftstep=1;
1450                       rightstep=1;
1451                       orightstep=(RN ? -(int)rightsize : 0);
1452                       numsteps=rightsize;
1453                       onumsteps=m_left->getNumDPPSample();
1454                    }
1455            }
1456            else            // right is an expanded scalar
1457            {
1458                    if (LS)
1459                    {
1460                       rightstep=1;
1461                       leftstep=0;
1462                       numsteps=m_right->getNumDPPSample();
1463                    }
1464                    else
1465                    {
1466                       rightstep=0;
1467                       orightstep=1;
1468                       leftstep=1;
1469                       oleftstep=(LN ? -(int)leftsize : 0);
1470                       numsteps=leftsize;
1471                       onumsteps=m_right->getNumDPPSample();
1472                    }
1473            }
1474      }
1475      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1476      {
1477            if (LEN)        // and Right will be a single value
1478            {
1479                    chunksize=rightsize;
1480                    leftstep=rightsize;
1481                    rightstep=0;
1482                    numsteps=m_left->getNumDPPSample();
1483                    if (RS)
1484                    {
1485                       numsteps*=leftsize;
1486                    }
1487            }
1488            else    // REN
1489            {
1490                    chunksize=leftsize;
1491                    rightstep=leftsize;
1492                    leftstep=0;
1493                    numsteps=m_right->getNumDPPSample();
1494                    if (LS)
1495                    {
1496                       numsteps*=rightsize;
1497                    }
1498            }
1499      }
1500    
1501      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1502            // Get the values of sub-expressions
1503      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1504      const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1505    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1506    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1507    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1508    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1509    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1510    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1511    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1512    
1513    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1514    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1515    
1516    
1517      roffset=m_samplesize*tid;
1518      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1519    switch(m_op)    switch(m_op)
1520    {    {
1521      case ADD:      case ADD:
1522          PROC_OP(NO_ARG,plus<double>());          //PROC_OP(NO_ARG,plus<double>());
1523      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1524                 &(*left)[0],
1525                 &(*right)[0],
1526                 chunksize,
1527                 onumsteps,
1528                 numsteps,
1529                 resultStep,
1530                 leftstep,
1531                 rightstep,
1532                 oleftstep,
1533                 orightstep,
1534                 lroffset,
1535                 rroffset,
1536                 escript::ESFunction::PLUSF);  
1537            break;
1538      case SUB:      case SUB:
1539      PROC_OP(NO_ARG,minus<double>());        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1540      break;               &(*left)[0],
1541                 &(*right)[0],
1542                 chunksize,
1543                 onumsteps,
1544                 numsteps,
1545                 resultStep,
1546                 leftstep,
1547                 rightstep,
1548                 oleftstep,
1549                 orightstep,
1550                 lroffset,
1551                 rroffset,
1552                 escript::ESFunction::MINUSF);        
1553            //PROC_OP(NO_ARG,minus<double>());
1554            break;
1555      case MUL:      case MUL:
1556      PROC_OP(NO_ARG,multiplies<double>());          //PROC_OP(NO_ARG,multiplies<double>());
1557      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1558                 &(*left)[0],
1559                 &(*right)[0],
1560                 chunksize,
1561                 onumsteps,
1562                 numsteps,
1563                 resultStep,
1564                 leftstep,
1565                 rightstep,
1566                 oleftstep,
1567                 orightstep,
1568                 lroffset,
1569                 rroffset,
1570                 escript::ESFunction::MULTIPLIESF);      
1571            break;
1572      case DIV:      case DIV:
1573      PROC_OP(NO_ARG,divides<double>());          //PROC_OP(NO_ARG,divides<double>());
1574      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1575                 &(*left)[0],
1576                 &(*right)[0],
1577                 chunksize,
1578                 onumsteps,
1579                 numsteps,
1580                 resultStep,
1581                 leftstep,
1582                 rightstep,
1583                 oleftstep,
1584                 orightstep,
1585                 lroffset,
1586                 rroffset,
1587                 escript::ESFunction::DIVIDESF);          
1588            break;
1589      case POW:      case POW:
1590         PROC_OP(double (double,double),::pow);         //PROC_OP(double (double,double),::pow);
1591      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1592                 &(*left)[0],
1593                 &(*right)[0],
1594                 chunksize,
1595                 onumsteps,
1596                 numsteps,
1597                 resultStep,
1598                 leftstep,
1599                 rightstep,
1600                 oleftstep,
1601                 orightstep,
1602                 lroffset,
1603                 rroffset,
1604                 escript::ESFunction::POWF);          
1605            break;
1606      default:      default:
1607      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1608    }    }
1609    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1610    return &v;    return &m_samples;
1611  }  }
1612    
1613    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1614  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1615  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1616  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1617  DataTypes::ValueType*  const DataTypes::RealVectorType*
1618  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1619  {  {
1620  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1621    
1622    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1623      // first work out which of the children are expanded          // first work out which of the children are expanded
1624    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1625    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1626    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1627    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1628    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1629    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1630      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1631    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1632    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1633    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1634      const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1635    
1636      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1637    
1638    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1639    cout << getNoValues() << endl;)
1640    
1641    
1642    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1643    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1644    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1645    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1646    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1647    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1648    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1649    
1650      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1651    switch(m_op)    switch(m_op)
1652    {    {
1653      case PROD:      case PROD:
1654      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1655      {          {
1656            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1657            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1658            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
1659        lroffset+=leftStep;  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1660        rroffset+=rightStep;  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1661      }  
1662      break;            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1663    
1664              lroffset+=leftStep;
1665              rroffset+=rightStep;
1666            }
1667            break;
1668      default:      default:
1669      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1670    }    }
1671    roffset=offset;    roffset=offset;
1672    return &v;    return &m_samples;
1673  }  }
1674    
1675    
1676    const DataTypes::RealVectorType*
1677    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1678    {
1679    #ifdef _OPENMP
1680            int tid=omp_get_thread_num();
1681    #else
1682            int tid=0;
1683    #endif
1684    
1685  /*  #ifdef LAZY_STACK_PROF
1686    \brief Compute the value of the expression for the given sample.          stackstart[tid]=&tid;
1687    \return Vector which stores the value of the subexpression for the given sample.          stackend[tid]=&tid;
1688    \param v A vector to store intermediate results.          const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1689    \param offset Index in v to begin storing results.          size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1690    \param sampleNo Sample number to evaluate.          #pragma omp critical
1691    \param roffset (output parameter) the offset in the return vector where the result begins.          if (d>maxstackuse)
1692            {
1693    cout << "Max resolve Stack use " << d << endl;
1694                    maxstackuse=d;
1695            }
1696            return r;
1697    #else
1698            return resolveNodeSample(tid, sampleNo, roffset);
1699    #endif
1700    }
1701    
1702    The return value will be an existing vector so do not deallocate it.  
1703  */  // This needs to do the work of the identity constructor
1704  // the vector and the offset are a place where the method could write its data if it wishes  void
1705  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::resolveToIdentity()
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
1706  {  {
1707  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)     if (m_op==IDENTITY)
1708      // collapse so we have a 'E' node or an IDENTITY for some other type          return;
1709    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1710    {     makeIdentity(p);
1711      collapse();  }
1712    }  
1713    if (m_op==IDENTITY)    void DataLazy::makeIdentity(const DataReady_ptr& p)
1714    {
1715       m_op=IDENTITY;
1716       m_axis_offset=0;
1717       m_transpose=0;
1718       m_SL=m_SM=m_SR=0;
1719       m_children=m_height=0;
1720       m_id=p;
1721       if(p->isConstant()) {m_readytype='C';}
1722       else if(p->isExpanded()) {m_readytype='E';}
1723       else if (p->isTagged()) {m_readytype='T';}
1724       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1725       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1726       m_left.reset();
1727       m_right.reset();
1728    }
1729    
1730    
1731    DataReady_ptr
1732    DataLazy::resolve()
1733    {
1734        resolveToIdentity();
1735        return m_id;
1736    }
1737    
1738    
1739    /* This is really a static method but I think that caused problems in windows */
1740    void
1741    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1742    {
1743      if (dats.empty())
1744    {    {
1745      const ValueType& vec=m_id->getVector();          return;
     if (m_readytype=='C')  
     {  
     roffset=0;  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
     return &(vec);  
1746    }    }
1747    if (m_readytype!='E')    vector<DataLazy*> work;
1748    {    FunctionSpace fs=dats[0]->getFunctionSpace();
1749      throw DataException("Programmer Error - Collapse did not produce an expanded node.");    bool match=true;
1750      for (int i=dats.size()-1;i>=0;--i)
1751      {
1752            if (dats[i]->m_readytype!='E')
1753            {
1754                    dats[i]->collapse();
1755            }
1756            if (dats[i]->m_op!=IDENTITY)
1757            {
1758                    work.push_back(dats[i]);
1759                    if (fs!=dats[i]->getFunctionSpace())
1760                    {
1761                            match=false;
1762                    }
1763            }
1764      }
1765      if (work.empty())
1766      {
1767            return;         // no work to do
1768      }
1769      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1770      {             // it is possible that dats[0] is one of the objects which we discarded and
1771                    // all the other functionspaces match.
1772            vector<DataExpanded*> dep;
1773            vector<RealVectorType*> vecs;
1774            for (int i=0;i<work.size();++i)
1775            {
1776                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1777                    vecs.push_back(&(dep[i]->getVectorRW()));
1778            }
1779            int totalsamples=work[0]->getNumSamples();
1780            const RealVectorType* res=0; // Storage for answer
1781            int sample;
1782            #pragma omp parallel private(sample, res)
1783            {
1784                size_t roffset=0;
1785                #pragma omp for schedule(static)
1786                for (sample=0;sample<totalsamples;++sample)
1787                {
1788                    roffset=0;
1789                    int j;
1790                    for (j=work.size()-1;j>=0;--j)
1791                    {
1792    #ifdef _OPENMP
1793                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1794    #else
1795                        res=work[j]->resolveNodeSample(0,sample,roffset);
1796    #endif
1797                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1798                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1799                    }
1800                }
1801            }
1802            // Now we need to load the new results as identity ops into the lazy nodes
1803            for (int i=work.size()-1;i>=0;--i)
1804            {
1805                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1806            }
1807    }    }
1808    switch (getOpgroup(m_op))    else  // functionspaces do not match
1809    {    {
1810    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);          for (int i=0;i<work.size();++i)
1811    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);          {
1812    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);                  work[i]->resolveToIdentity();
1813    case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);          }
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1814    }    }
1815  }  }
1816    
1817    
1818  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1819  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1820  DataReady_ptr  DataReady_ptr
1821  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1822  {  {
1823      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  
   
   if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally  
1824    {    {
1825      collapse();      collapse();
1826    }    }
1827    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1828    {    {
1829      return m_id;      return m_id;
1830    }    }
1831      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1832    size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1833      // storage to evaluate its expression    RealVectorType& resvec=result->getVectorRW();
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVector();  
1834    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1835    
1836    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1837    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1838    const ValueType* res=0;   // Vector storing the answer    const RealVectorType* res=0;       // Storage for answer
1839    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1840    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
1841    for (sample=0;sample<totalsamples;++sample)    {
1842    {          size_t roffset=0;
1843  LAZYDEBUG(cout << "################################# " << sample << endl;)  #ifdef LAZY_STACK_PROF
1844            stackstart[omp_get_thread_num()]=&roffset;
1845            stackend[omp_get_thread_num()]=&roffset;
1846    #endif
1847            #pragma omp for schedule(static)
1848            for (sample=0;sample<totalsamples;++sample)
1849            {
1850                    roffset=0;
1851  #ifdef _OPENMP  #ifdef _OPENMP
1852      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1853  #else  #else
1854      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1855  #endif  #endif
1856  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1857      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1858  LAZYDEBUG(cerr << "offset=" << outoffset << endl;)                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1859      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1860      {          }
1861      resvec[outoffset]=(*res)[resoffset];    }
1862      }  #ifdef LAZY_STACK_PROF
1863  LAZYDEBUG(cerr << "*********************************" << endl;)    for (int i=0;i<getNumberOfThreads();++i)
1864      {
1865            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1866    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1867            if (r>maxstackuse)
1868            {
1869                    maxstackuse=r;
1870            }
1871    }    }
1872      cout << "Max resolve Stack use=" << maxstackuse << endl;
1873    #endif
1874    return resptr;    return resptr;
1875  }  }
1876    
# Line 1177  std::string Line 1878  std::string
1878  DataLazy::toString() const  DataLazy::toString() const
1879  {  {
1880    ostringstream oss;    ostringstream oss;
1881    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1882    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1883      {
1884      case 1:       // tree format
1885            oss << endl;
1886            intoTreeString(oss,"");
1887            break;
1888      case 2:       // just the depth
1889            break;
1890      default:
1891            intoString(oss);
1892            break;
1893      }
1894    return oss.str();    return oss.str();
1895  }  }
1896    
# Line 1186  DataLazy::toString() const Line 1898  DataLazy::toString() const
1898  void  void
1899  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1900  {  {
1901    //    oss << "[" << m_children <<";"<<m_height <<"]";
1902    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1903    {    {
1904    case G_IDENTITY:    case G_IDENTITY:
1905      if (m_id->isExpanded())          if (m_id->isExpanded())
1906      {          {
1907         oss << "E";             oss << "E";
1908      }          }
1909      else if (m_id->isTagged())          else if (m_id->isTagged())
1910      {          {
1911        oss << "T";            oss << "T";
1912      }          }
1913      else if (m_id->isConstant())          else if (m_id->isConstant())
1914      {          {
1915        oss << "C";            oss << "C";
1916      }          }
1917      else          else
1918      {          {
1919        oss << "?";            oss << "?";
1920      }          }
1921      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1922      break;          break;
1923    case G_BINARY:    case G_BINARY:
1924      oss << '(';          oss << '(';
1925      m_left->intoString(oss);          m_left->intoString(oss);
1926      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1927      m_right->intoString(oss);          m_right->intoString(oss);
1928      oss << ')';          oss << ')';
1929      break;          break;
1930    case G_UNARY:    case G_UNARY:
1931      case G_UNARY_P:
1932    case G_NP1OUT:    case G_NP1OUT:
1933    case G_NP1OUT_P:    case G_NP1OUT_P:
1934      oss << opToString(m_op) << '(';    case G_REDUCTION:
1935      m_left->intoString(oss);          oss << opToString(m_op) << '(';
1936      oss << ')';          m_left->intoString(oss);
1937      break;          oss << ')';
1938            break;
1939    case G_TENSORPROD:    case G_TENSORPROD:
1940      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1941      m_left->intoString(oss);          m_left->intoString(oss);
1942      oss << ", ";          oss << ", ";
1943      m_right->intoString(oss);          m_right->intoString(oss);
1944      oss << ')';          oss << ')';
1945      break;          break;
1946      case G_NP1OUT_2P:
1947            oss << opToString(m_op) << '(';
1948            m_left->intoString(oss);
1949            oss << ", " << m_axis_offset << ", " << m_transpose;
1950            oss << ')';
1951            break;
1952      case G_CONDEVAL:
1953            oss << opToString(m_op)<< '(' ;
1954            m_mask->intoString(oss);
1955            oss << " ? ";
1956            m_left->intoString(oss);
1957            oss << " : ";
1958            m_right->intoString(oss);
1959            oss << ')';
1960            break;
1961    default:    default:
1962      oss << "UNKNOWN";          oss << "UNKNOWN";
1963    }    }
1964  }  }
1965    
1966    
1967    void
1968    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1969    {
1970      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1971      switch (getOpgroup(m_op))
1972      {
1973      case G_IDENTITY:
1974            if (m_id->isExpanded())
1975            {
1976               oss << "E";
1977            }
1978            else if (m_id->isTagged())
1979            {
1980              oss << "T";
1981            }
1982            else if (m_id->isConstant())
1983            {
1984              oss << "C";
1985            }
1986            else
1987            {
1988              oss << "?";
1989            }
1990            oss << '@' << m_id.get() << endl;
1991            break;
1992      case G_BINARY:
1993            oss << opToString(m_op) << endl;
1994            indent+='.';
1995            m_left->intoTreeString(oss, indent);
1996            m_right->intoTreeString(oss, indent);
1997            break;
1998      case G_UNARY:
1999      case G_UNARY_P:
2000      case G_NP1OUT:
2001      case G_NP1OUT_P:
2002      case G_REDUCTION:
2003            oss << opToString(m_op) << endl;
2004            indent+='.';
2005            m_left->intoTreeString(oss, indent);
2006            break;
2007      case G_TENSORPROD:
2008            oss << opToString(m_op) << endl;
2009            indent+='.';
2010            m_left->intoTreeString(oss, indent);
2011            m_right->intoTreeString(oss, indent);
2012            break;
2013      case G_NP1OUT_2P:
2014            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2015            indent+='.';
2016            m_left->intoTreeString(oss, indent);
2017            break;
2018      default:
2019            oss << "UNKNOWN";
2020      }
2021    }
2022    
2023    
2024  DataAbstract*  DataAbstract*
2025  DataLazy::deepCopy()  DataLazy::deepCopy() const
2026  {  {
2027    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2028    {    {
2029    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2030    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
2031    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2032      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2033      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2034    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2035    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2036      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2037      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2038    default:    default:
2039      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2040    }    }
2041  }  }
2042    
2043    
2044    
2045  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2046  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2047  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
2048  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2049  // form part of the expression.  // form part of the expression.
2050  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2051  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2052  DataLazy::getLength() const  DataLazy::getLength() const
2053  {  {
2054    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1270  DataLazy::getSlice(const DataTypes::Regi Line 2063  DataLazy::getSlice(const DataTypes::Regi
2063    
2064    
2065  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2066  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2067  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2068                   int dataPointNo)                   int dataPointNo)
2069  {  {
2070    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2071    {    {
2072      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2073    }    }
2074    if (m_readytype!='E')    if (m_readytype!='E')
2075    {    {
2076      collapse();          collapse();
2077      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2078    }    }
2079    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
2080    // so we only need to know which child to ask    // so we only need to know which child to ask
2081    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
2082    {    {
2083      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2084    }    }
2085    else    else
2086    {    {
2087      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2088    }    }
2089  }  }
2090    
2091  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2092  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2093  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2094                   int dataPointNo) const                   int dataPointNo) const
2095  {  {
2096    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2097    {    {
2098      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2099    }    }
2100    if (m_readytype=='E')    if (m_readytype=='E')
2101    {    {
# Line 1310  DataLazy::getPointOffset(int sampleNo, Line 2103  DataLazy::getPointOffset(int sampleNo,
2103      // so we only need to know which child to ask      // so we only need to know which child to ask
2104      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
2105      {      {
2106      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2107      }      }
2108      else      else
2109      {      {
2110      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2111      }      }
2112    }    }
2113    if (m_readytype=='C')    if (m_readytype=='C')
2114    {    {
2115      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2116    }    }
2117    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2118  }  }
2119    
2120  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2121  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2122  void  void
2123  DataLazy::setToZero()  DataLazy::setToZero()
2124  {  {
2125    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2126    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2127    m_op=IDENTITY;  //   m_op=IDENTITY;
2128    m_right.reset();    //   m_right.reset();  
2129    m_left.reset();  //   m_left.reset();
2130    m_readytype='C';  //   m_readytype='C';
2131    m_buffsRequired=1;  //   m_buffsRequired=1;
2132    
2133      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2134      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2135  }  }
2136    
2137  }   // end namespace  bool
2138    DataLazy::actsExpanded() const
2139    {
2140            return (m_readytype=='E');
2141    }
2142    
2143    } // end namespace
2144    

Legend:
Removed from v.2092  
changed lines
  Added in v.6083

  ViewVC Help
Powered by ViewVC 1.1.26