/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2147 by jfenwick, Wed Dec 10 04:41:26 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 5997 by caltinay, Mon Feb 29 07:24:47 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21    #include "Data.h"
22    #include "DataTypes.h"
23    #include "EscriptParams.h"
24    #include "FunctionSpace.h"
25    #include "UnaryFuncs.h"    // for escript::fsign
26    #include "Utils.h"
27    
28  #ifdef USE_NETCDF  #ifdef USE_NETCDF
29  #include <netcdfcpp.h>  #include <netcdfcpp.h>
30  #endif  #endif
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
 #include "Data.h"  
 #include "UnaryFuncs.h"     // for escript::fsign  
 #include "Utils.h"  
31    
32  // #define LAZYDEBUG(X) X;  #include <iomanip> // for some fancy formatting in debug
33    
34    using namespace escript::DataTypes;
35    
36    #define NO_ARG
37    
38    // #define LAZYDEBUG(X) if (privdebug){X;}
39  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
40    namespace
41    {
42    bool privdebug=false;
43    
44    #define ENABLEDEBUG privdebug=true;
45    #define DISABLEDEBUG privdebug=false;
46    }
47    
48    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
49    
50    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
51    
52    
53    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
54    
55  /*  /*
56  How does DataLazy work?  How does DataLazy work?
# Line 42  A special operation, IDENTITY, stores an Line 63  A special operation, IDENTITY, stores an
63  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
64    
65  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
66  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
67    
68  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
69  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 50  I will refer to individual DataLazy obje Line 71  I will refer to individual DataLazy obje
71    
72  Each node also stores:  Each node also stores:
73  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
74      evaluated.          evaluated.
75  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
76      evaluate the expression.          evaluate the expression.
77  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
78    
79  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 93  namespace escript Line 114  namespace escript
114  namespace  namespace
115  {  {
116    
117    
118    // enabling this will print out when ever the maximum stacksize used by resolve increases
119    // it assumes _OPENMP is also in use
120    //#define LAZY_STACK_PROF
121    
122    
123    
124    #ifndef _OPENMP
125      #ifdef LAZY_STACK_PROF
126      #undef LAZY_STACK_PROF
127      #endif
128    #endif
129    
130    
131    #ifdef LAZY_STACK_PROF
132    std::vector<void*> stackstart(getNumberOfThreads());
133    std::vector<void*> stackend(getNumberOfThreads());
134    size_t maxstackuse=0;
135    #endif
136    
137  enum ES_opgroup  enum ES_opgroup
138  {  {
139     G_UNKNOWN,     G_UNKNOWN,
140     G_IDENTITY,     G_IDENTITY,
141     G_BINARY,        // pointwise operations with two arguments     G_BINARY,            // pointwise operations with two arguments
142     G_UNARY,     // pointwise operations with one argument     G_UNARY,             // pointwise operations with one argument
143     G_UNARY_P,       // pointwise operations with one argument, requiring a parameter     G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
144     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,            // non-pointwise op with one output
145     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
146     G_TENSORPROD     // general tensor product     G_TENSORPROD,        // general tensor product
147       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
148       G_REDUCTION,         // non-pointwise unary op with a scalar output
149       G_CONDEVAL
150  };  };
151    
152    
153    
154    
155  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
156              "sin","cos","tan",                          "sin","cos","tan",
157              "asin","acos","atan","sinh","cosh","tanh","erf",                          "asin","acos","atan","sinh","cosh","tanh","erf",
158              "asinh","acosh","atanh",                          "asinh","acosh","atanh",
159              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "log10","log","sign","abs","neg","pos","exp","sqrt",
160              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
161              "symmetric","nonsymmetric",                          "symmetric","nonsymmetric",
162              "prod",                          "prod",
163              "transpose", "trace"};                          "transpose", "trace",
164  int ES_opcount=40;                          "swapaxes",
165                            "minval", "maxval",
166                            "condEval"};
167    int ES_opcount=44;
168  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
169              G_UNARY,G_UNARY,G_UNARY, //10                          G_UNARY,G_UNARY,G_UNARY, //10
170              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
171              G_UNARY,G_UNARY,G_UNARY,                    // 20                          G_UNARY,G_UNARY,G_UNARY,                                        // 20
172              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
173              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
174              G_NP1OUT,G_NP1OUT,                          G_NP1OUT,G_NP1OUT,
175              G_TENSORPROD,                          G_TENSORPROD,
176              G_NP1OUT_P, G_NP1OUT_P};                          G_NP1OUT_P, G_NP1OUT_P,
177                            G_NP1OUT_2P,
178                            G_REDUCTION, G_REDUCTION,
179                            G_CONDEVAL};
180  inline  inline
181  ES_opgroup  ES_opgroup
182  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 138  getOpgroup(ES_optype op) Line 188  getOpgroup(ES_optype op)
188  FunctionSpace  FunctionSpace
189  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
190  {  {
191      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
192      // maybe we need an interpolate node -          // maybe we need an interpolate node -
193      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
194      // programming error exception.          // programming error exception.
195    
196    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
197    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
198    if (l!=r)    if (l!=r)
199    {    {
200      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
201        if (res==1)
202      {      {
203      return l;          return l;
204      }      }
205      if (l.probeInterpolation(r))      if (res==-1)
206      {      {
207      return r;          return r;
208      }      }
209      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
210    }    }
# Line 165  resultFS(DataAbstract_ptr left, DataAbst Line 216  resultFS(DataAbstract_ptr left, DataAbst
216  DataTypes::ShapeType  DataTypes::ShapeType
217  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
218  {  {
219      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
220      {          {
221        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
222        {            {
223          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
224        }            }
225        if (left->getRank()==0)   // we need to allow scalar * anything  
226        {            if (left->getRank()==0)       // we need to allow scalar * anything
227          return right->getShape();            {
228        }                  return right->getShape();
229        if (right->getRank()==0)            }
230        {            if (right->getRank()==0)
231          return left->getShape();            {
232        }                  return left->getShape();
233        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            }
234      }            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
235      return left->getShape();          }
236            return left->getShape();
237  }  }
238    
239  // return the shape for "op left"  // return the shape for "op left"
240    
241  DataTypes::ShapeType  DataTypes::ShapeType
242  resultShape(DataAbstract_ptr left, ES_optype op)  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
243    {
244            switch(op)
245            {
246            case TRANS:
247               {                    // for the scoping of variables
248                    const DataTypes::ShapeType& s=left->getShape();
249                    DataTypes::ShapeType sh;
250                    int rank=left->getRank();
251                    if (axis_offset<0 || axis_offset>rank)
252                    {
253                stringstream e;
254                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
255                throw DataException(e.str());
256            }
257            for (int i=0; i<rank; i++)
258                    {
259                       int index = (axis_offset+i)%rank;
260               sh.push_back(s[index]); // Append to new shape
261            }
262                    return sh;
263               }
264            break;
265            case TRACE:
266               {
267                    int rank=left->getRank();
268                    if (rank<2)
269                    {
270                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
271                    }
272                    if ((axis_offset>rank-2) || (axis_offset<0))
273                    {
274                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
275                    }
276                    if (rank==2)
277                    {
278                       return DataTypes::scalarShape;
279                    }
280                    else if (rank==3)
281                    {
282                       DataTypes::ShapeType sh;
283                       if (axis_offset==0)
284                       {
285                            sh.push_back(left->getShape()[2]);
286                       }
287                       else         // offset==1
288                       {
289                            sh.push_back(left->getShape()[0]);
290                       }
291                       return sh;
292                    }
293                    else if (rank==4)
294                    {
295                       DataTypes::ShapeType sh;
296                       const DataTypes::ShapeType& s=left->getShape();
297                       if (axis_offset==0)
298                       {
299                            sh.push_back(s[2]);
300                            sh.push_back(s[3]);
301                       }
302                       else if (axis_offset==1)
303                       {
304                            sh.push_back(s[0]);
305                            sh.push_back(s[3]);
306                       }
307                       else         // offset==2
308                       {
309                            sh.push_back(s[0]);
310                            sh.push_back(s[1]);
311                       }
312                       return sh;
313                    }
314                    else            // unknown rank
315                    {
316                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
317                    }
318               }
319            break;
320            default:
321            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
322            }
323    }
324    
325    DataTypes::ShapeType
326    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
327  {  {
328      switch(op)       // This code taken from the Data.cpp swapaxes() method
329      {       // Some of the checks are probably redundant here
330          case TRANS:       int axis0_tmp,axis1_tmp;
331          return left->getShape();       const DataTypes::ShapeType& s=left->getShape();
332      break;       DataTypes::ShapeType out_shape;
333      case TRACE:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
334          return DataTypes::scalarShape;       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
335      break;       int rank=left->getRank();
336          default:       if (rank<2) {
337      throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
338      }       }
339         if (axis0<0 || axis0>rank-1) {
340            stringstream e;
341            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
342            throw DataException(e.str());
343         }
344         if (axis1<0 || axis1>rank-1) {
345            stringstream e;
346            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
347            throw DataException(e.str());
348         }
349         if (axis0 == axis1) {
350             throw DataException("Error - Data::swapaxes: axis indices must be different.");
351         }
352         if (axis0 > axis1) {
353             axis0_tmp=axis1;
354             axis1_tmp=axis0;
355         } else {
356             axis0_tmp=axis0;
357             axis1_tmp=axis1;
358         }
359         for (int i=0; i<rank; i++) {
360           if (i == axis0_tmp) {
361              out_shape.push_back(s[axis1_tmp]);
362           } else if (i == axis1_tmp) {
363              out_shape.push_back(s[axis0_tmp]);
364           } else {
365              out_shape.push_back(s[i]);
366           }
367         }
368        return out_shape;
369  }  }
370    
371    
372  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
373  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
374  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
375  DataTypes::ShapeType  DataTypes::ShapeType
376  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
377  {  {
378                
379    // Get rank and shape of inputs    // Get rank and shape of inputs
380    int rank0 = left->getRank();    int rank0 = left->getRank();
381    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 217  GTPShape(DataAbstract_ptr left, DataAbst Line 384  GTPShape(DataAbstract_ptr left, DataAbst
384    
385    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
386    int start0=0, start1=0;    int start0=0, start1=0;
387    if (transpose == 0)       {}    if (transpose == 0)           {}
388    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
389    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
390    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
391    
392    if (rank0<axis_offset)    if (rank0<axis_offset)
393    {    {
394      throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");          throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
395    }    }
396    
397    // Adjust the shapes for transpose    // Adjust the shapes for transpose
398    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
399    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
400    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
401    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
402    
403    // Prepare for the loops of the product    // Prepare for the loops of the product
404    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
405    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
406      SL *= tmpShape0[i];      SL *= tmpShape0[i];
407    }    }
408    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
409      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
410        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
411      }      }
412      SM *= tmpShape0[i];      SM *= tmpShape0[i];
413    }    }
414    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
415      SR *= tmpShape1[i];      SR *= tmpShape1[i];
416    }    }
417    
418    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
419    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
420    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
421       int out_index=0;       int out_index=0;
422       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
423       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
# Line 266  GTPShape(DataAbstract_ptr left, DataAbst Line 433  GTPShape(DataAbstract_ptr left, DataAbst
433    return shape2;    return shape2;
434  }  }
435    
436    }       // end anonymous namespace
 // determine the number of points in the result of "left op right"  
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
 }   // end anonymous namespace  
437    
438    
439    
# Line 318  opToString(ES_optype op) Line 448  opToString(ES_optype op)
448    return ES_opstrings[op];    return ES_opstrings[op];
449  }  }
450    
451    void DataLazy::LazyNodeSetup()
452    {
453    #ifdef _OPENMP
454        int numthreads=omp_get_max_threads();
455        m_samples.resize(numthreads*m_samplesize);
456        m_sampleids=new int[numthreads];
457        for (int i=0;i<numthreads;++i)
458        {
459            m_sampleids[i]=-1;  
460        }
461    #else
462        m_samples.resize(m_samplesize);
463        m_sampleids=new int[1];
464        m_sampleids[0]=-1;
465    #endif  // _OPENMP
466    }
467    
468    
469    // Creates an identity node
470  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
471      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
472      m_op(IDENTITY),          ,m_sampleids(0),
473      m_axis_offset(0),          m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
474  {  {
475     if (p->isLazy())     if (p->isLazy())
476     {     {
477      // I don't want identity of Lazy.          // I don't want identity of Lazy.
478      // Question: Why would that be so bad?          // Question: Why would that be so bad?
479      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
480      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
481     }     }
482     else     else
483     {     {
484      m_id=dynamic_pointer_cast<DataReady>(p);          p->makeLazyShared();
485      if(p->isConstant()) {m_readytype='C';}          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
486      else if(p->isExpanded()) {m_readytype='E';}          makeIdentity(dr);
487      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
488     }     }
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
489  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
490  }  }
491    
   
   
   
492  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
493      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
494      m_op(op),          m_op(op),
495      m_axis_offset(0),          m_axis_offset(0),
496      m_transpose(0),          m_transpose(0),
497      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
498  {  {
499     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
500     {     {
501      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
502     }     }
503    
504     DataLazy_ptr lleft;     DataLazy_ptr lleft;
505     if (!left->isLazy())     if (!left->isLazy())
506     {     {
507      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
508     }     }
509     else     else
510     {     {
511      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
512     }     }
513     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
514     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
515     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
516     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
517       m_height=m_left->m_height+1;
518       LazyNodeSetup();
519       SIZELIMIT
520  }  }
521    
522    
523  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
524  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
525      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
526      m_op(op),          m_op(op),
527      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
528  {  {
529    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
530     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
531     {     {
532      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
533     }     }
534    
535     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
536     {     {
537      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
538      Data ltemp(left);          Data ltemp(left);
539      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
540      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
541     }     }
542     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
543     {     {
544      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
545      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
546    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
547     }     }
548     left->operandCheck(*right);     left->operandCheck(*right);
549    
550     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
551     {     {
552      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
553    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
554     }     }
555     else     else
556     {     {
557      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
558    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
559     }     }
560     if (right->isLazy())     if (right->isLazy())
561     {     {
562      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
563    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
564     }     }
565     else     else
566     {     {
567      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
568    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
569     }     }
570     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
571     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
572     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
573     {     {
574      m_readytype='E';          m_readytype='E';
575     }     }
576     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
577     {     {
578      m_readytype='T';          m_readytype='T';
579     }     }
580     else     else
581     {     {
582      m_readytype='C';          m_readytype='C';
583     }     }
584     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
585     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
586     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
587       LazyNodeSetup();
588       SIZELIMIT
589  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
590  }  }
591    
592  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
593      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
594      m_op(op),          m_op(op),
595      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
596      m_transpose(transpose)          m_transpose(transpose)
597  {  {
598     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
599     {     {
600      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
601     }     }
602     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
603     {     {
604      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
605     }     }
606     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
607     {     {
608      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
609      Data ltemp(left);          Data ltemp(left);
610      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
611      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
612     }     }
613     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
614     {     {
615      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
616      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
617     }     }
618     left->operandCheck(*right);  //    left->operandCheck(*right);
619    
620     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
621     {     {
622      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
623     }     }
624     else     else
625     {     {
626      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
627     }     }
628     if (right->isLazy())     if (right->isLazy())
629     {     {
630      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
631     }     }
632     else     else
633     {     {
634      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
635     }     }
636     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
637     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
638     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
639     {     {
640      m_readytype='E';          m_readytype='E';
641     }     }
642     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
643     {     {
644      m_readytype='T';          m_readytype='T';
645     }     }
646     else     else
647     {     {
648      m_readytype='C';          m_readytype='C';
649     }     }
650     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
651     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
652     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
653       LazyNodeSetup();
654       SIZELIMIT
655  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
656  }  }
657    
658    
659  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
660      : parent(left->getFunctionSpace(), resultShape(left,op)),          : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
661      m_op(op),          m_op(op),
662      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
663      m_transpose(0),          m_transpose(0),
664      m_tol(0)          m_tol(0)
665  {  {
666     if ((getOpgroup(op)!=G_NP1OUT_P))     if ((getOpgroup(op)!=G_NP1OUT_P))
667     {     {
668      throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
669     }     }
670     DataLazy_ptr lleft;     DataLazy_ptr lleft;
671     if (!left->isLazy())     if (!left->isLazy())
672     {     {
673      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
674     }     }
675     else     else
676     {     {
677      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
678     }     }
679     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
680     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
681     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
682     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
683       m_height=m_left->m_height+1;
684       LazyNodeSetup();
685       SIZELIMIT
686  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
687  }  }
688    
689  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
690      : parent(left->getFunctionSpace(), left->getShape()),          : parent(left->getFunctionSpace(), left->getShape()),
691      m_op(op),          m_op(op),
692      m_axis_offset(0),          m_axis_offset(0),
693      m_transpose(0),          m_transpose(0),
694      m_tol(tol)          m_tol(tol)
695  {  {
696     if ((getOpgroup(op)!=G_UNARY_P))     if ((getOpgroup(op)!=G_UNARY_P))
697     {     {
698      throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
699     }     }
700     DataLazy_ptr lleft;     DataLazy_ptr lleft;
701     if (!left->isLazy())     if (!left->isLazy())
702     {     {
703      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
704     }     }
705     else     else
706     {     {
707      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
708     }     }
709     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
710     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
711     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
712     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
713       m_height=m_left->m_height+1;
714       LazyNodeSetup();
715       SIZELIMIT
716  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
717  }  }
718    
719  DataLazy::~DataLazy()  
720    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
721            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
722            m_op(op),
723            m_axis_offset(axis0),
724            m_transpose(axis1),
725            m_tol(0)
726  {  {
727       if ((getOpgroup(op)!=G_NP1OUT_2P))
728       {
729            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
730       }
731       DataLazy_ptr lleft;
732       if (!left->isLazy())
733       {
734            lleft=DataLazy_ptr(new DataLazy(left));
735       }
736       else
737       {
738            lleft=dynamic_pointer_cast<DataLazy>(left);
739       }
740       m_readytype=lleft->m_readytype;
741       m_left=lleft;
742       m_samplesize=getNumDPPSample()*getNoValues();
743       m_children=m_left->m_children+1;
744       m_height=m_left->m_height+1;
745       LazyNodeSetup();
746       SIZELIMIT
747    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
748  }  }
749    
750    
751  int  namespace
752  DataLazy::getBuffsRequired() const  {
753    
754        inline int max3(int a, int b, int c)
755        {
756            int t=(a>b?a:b);
757            return (t>c?t:c);
758    
759        }
760    }
761    
762    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
763            : parent(left->getFunctionSpace(), left->getShape()),
764            m_op(CONDEVAL),
765            m_axis_offset(0),
766            m_transpose(0),
767            m_tol(0)
768  {  {
769      return m_buffsRequired;  
770       DataLazy_ptr lmask;
771       DataLazy_ptr lleft;
772       DataLazy_ptr lright;
773       if (!mask->isLazy())
774       {
775            lmask=DataLazy_ptr(new DataLazy(mask));
776       }
777       else
778       {
779            lmask=dynamic_pointer_cast<DataLazy>(mask);
780       }
781       if (!left->isLazy())
782       {
783            lleft=DataLazy_ptr(new DataLazy(left));
784       }
785       else
786       {
787            lleft=dynamic_pointer_cast<DataLazy>(left);
788       }
789       if (!right->isLazy())
790       {
791            lright=DataLazy_ptr(new DataLazy(right));
792       }
793       else
794       {
795            lright=dynamic_pointer_cast<DataLazy>(right);
796       }
797       m_readytype=lmask->m_readytype;
798       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
799       {
800            throw DataException("Programmer Error - condEval arguments must have the same readytype");
801       }
802       m_left=lleft;
803       m_right=lright;
804       m_mask=lmask;
805       m_samplesize=getNumDPPSample()*getNoValues();
806       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
807       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
808       LazyNodeSetup();
809       SIZELIMIT
810    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
811  }  }
812    
813    
814  size_t  
815  DataLazy::getMaxSampleSize() const  DataLazy::~DataLazy()
816  {  {
817      return m_maxsamplesize;     delete[] m_sampleids;
818  }  }
819    
820    
821  /*  /*
822    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
823    This does the work for the collapse method.    This does the work for the collapse method.
824    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
825  */  */
826  DataReady_ptr  DataReady_ptr
827  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
828  {  {
829    if (m_readytype=='E')    if (m_readytype=='E')
830    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
831      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
832    }    }
833    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 606  DataLazy::collapseToReady() Line 845  DataLazy::collapseToReady()
845    switch(m_op)    switch(m_op)
846    {    {
847      case ADD:      case ADD:
848      result=left+right;          result=left+right;
849      break;          break;
850      case SUB:            case SUB:          
851      result=left-right;          result=left-right;
852      break;          break;
853      case MUL:            case MUL:          
854      result=left*right;          result=left*right;
855      break;          break;
856      case DIV:            case DIV:          
857      result=left/right;          result=left/right;
858      break;          break;
859      case SIN:      case SIN:
860      result=left.sin();            result=left.sin();      
861      break;          break;
862      case COS:      case COS:
863      result=left.cos();          result=left.cos();
864      break;          break;
865      case TAN:      case TAN:
866      result=left.tan();          result=left.tan();
867      break;          break;
868      case ASIN:      case ASIN:
869      result=left.asin();          result=left.asin();
870      break;          break;
871      case ACOS:      case ACOS:
872      result=left.acos();          result=left.acos();
873      break;          break;
874      case ATAN:      case ATAN:
875      result=left.atan();          result=left.atan();
876      break;          break;
877      case SINH:      case SINH:
878      result=left.sinh();          result=left.sinh();
879      break;          break;
880      case COSH:      case COSH:
881      result=left.cosh();          result=left.cosh();
882      break;          break;
883      case TANH:      case TANH:
884      result=left.tanh();          result=left.tanh();
885      break;          break;
886      case ERF:      case ERF:
887      result=left.erf();          result=left.erf();
888      break;          break;
889     case ASINH:     case ASINH:
890      result=left.asinh();          result=left.asinh();
891      break;          break;
892     case ACOSH:     case ACOSH:
893      result=left.acosh();          result=left.acosh();
894      break;          break;
895     case ATANH:     case ATANH:
896      result=left.atanh();          result=left.atanh();
897      break;          break;
898      case LOG10:      case LOG10:
899      result=left.log10();          result=left.log10();
900      break;          break;
901      case LOG:      case LOG:
902      result=left.log();          result=left.log();
903      break;          break;
904      case SIGN:      case SIGN:
905      result=left.sign();          result=left.sign();
906      break;          break;
907      case ABS:      case ABS:
908      result=left.abs();          result=left.abs();
909      break;          break;
910      case NEG:      case NEG:
911      result=left.neg();          result=left.neg();
912      break;          break;
913      case POS:      case POS:
914      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
915      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
916      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
917      break;          break;
918      case EXP:      case EXP:
919      result=left.exp();          result=left.exp();
920      break;          break;
921      case SQRT:      case SQRT:
922      result=left.sqrt();          result=left.sqrt();
923      break;          break;
924      case RECIP:      case RECIP:
925      result=left.oneOver();          result=left.oneOver();
926      break;          break;
927      case GZ:      case GZ:
928      result=left.wherePositive();          result=left.wherePositive();
929      break;          break;
930      case LZ:      case LZ:
931      result=left.whereNegative();          result=left.whereNegative();
932      break;          break;
933      case GEZ:      case GEZ:
934      result=left.whereNonNegative();          result=left.whereNonNegative();
935      break;          break;
936      case LEZ:      case LEZ:
937      result=left.whereNonPositive();          result=left.whereNonPositive();
938      break;          break;
939      case NEZ:      case NEZ:
940      result=left.whereNonZero(m_tol);          result=left.whereNonZero(m_tol);
941      break;          break;
942      case EZ:      case EZ:
943      result=left.whereZero(m_tol);          result=left.whereZero(m_tol);
944      break;          break;
945      case SYM:      case SYM:
946      result=left.symmetric();          result=left.symmetric();
947      break;          break;
948      case NSYM:      case NSYM:
949      result=left.nonsymmetric();          result=left.nonsymmetric();
950      break;          break;
951      case PROD:      case PROD:
952      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
953      break;          break;
954      case TRANS:      case TRANS:
955      result=left.transpose(m_axis_offset);          result=left.transpose(m_axis_offset);
956      break;          break;
957      case TRACE:      case TRACE:
958      result=left.trace(m_axis_offset);          result=left.trace(m_axis_offset);
959      break;          break;
960        case SWAP:
961            result=left.swapaxes(m_axis_offset, m_transpose);
962            break;
963        case MINVAL:
964            result=left.minval();
965            break;
966        case MAXVAL:
967            result=left.minval();
968            break;
969      default:      default:
970      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
971    }    }
972    return result.borrowReadyPtr();    return result.borrowReadyPtr();
973  }  }
# Line 731  DataLazy::collapseToReady() Line 979  DataLazy::collapseToReady()
979     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
980  */  */
981  void  void
982  DataLazy::collapse()  DataLazy::collapse() const
983  {  {
984    if (m_op==IDENTITY)    if (m_op==IDENTITY)
985    {    {
986      return;          return;
987    }    }
988    if (m_readytype=='E')    if (m_readytype=='E')
989    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
990      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
991    }    }
992    m_id=collapseToReady();    m_id=collapseToReady();
993    m_op=IDENTITY;    m_op=IDENTITY;
994  }  }
995    
996  /*  
997    \brief Compute the value of the expression (unary operation) for the given sample.  
998    \return Vector which stores the value of the subexpression for the given sample.  
999    \param v A vector to store intermediate results.  
1000    \param offset Index in v to begin storing results.  
1001    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1002    \param roffset (output parameter) the offset in the return vector where the result begins.          for (int j=0;j<onumsteps;++j)\
1003            {\
1004    The return value will be an existing vector so do not deallocate it.            for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1005    If the result is stored in v it should be stored at the offset given.            { \
1006    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1007  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1008  DataTypes::ValueType*               tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1009  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1010                 lroffset+=leftstep; \
1011                 rroffset+=rightstep; \
1012              }\
1013              lroffset+=oleftstep;\
1014              rroffset+=orightstep;\
1015            }
1016    
1017    
1018    // The result will be stored in m_samples
1019    // The return value is a pointer to the DataVector, offset is the offset within the return value
1020    const DataTypes::RealVectorType*
1021    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1022    {
1023    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1024            // collapse so we have a 'E' node or an IDENTITY for some other type
1025      if (m_readytype!='E' && m_op!=IDENTITY)
1026      {
1027            collapse();
1028      }
1029      if (m_op==IDENTITY)  
1030      {
1031        const ValueType& vec=m_id->getVectorRO();
1032        roffset=m_id->getPointOffset(sampleNo, 0);
1033    #ifdef LAZY_STACK_PROF
1034    int x;
1035    if (&x<stackend[omp_get_thread_num()])
1036  {  {
1037      // we assume that any collapsing has been done before we get here         stackend[omp_get_thread_num()]=&x;
1038      // since we only have one argument we don't need to think about only  }
1039      // processing single points.  #endif
1040        return &(vec);
1041      }
1042      if (m_readytype!='E')
1043      {
1044        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1045      }
1046      if (m_sampleids[tid]==sampleNo)
1047      {
1048            roffset=tid*m_samplesize;
1049            return &(m_samples);            // sample is already resolved
1050      }
1051      m_sampleids[tid]=sampleNo;
1052    
1053      switch (getOpgroup(m_op))
1054      {
1055      case G_UNARY:
1056      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1057      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1058      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1059      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1060      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1061      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1062      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1063      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1064      default:
1065        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1066      }
1067    }
1068    
1069    const DataTypes::RealVectorType*
1070    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1071    {
1072            // we assume that any collapsing has been done before we get here
1073            // since we only have one argument we don't need to think about only
1074            // processing single points.
1075            // we will also know we won't get identity nodes
1076    if (m_readytype!='E')    if (m_readytype!='E')
1077    {    {
1078      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1079    }    }
1080    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1081    const double* left=&((*vleft)[roffset]);    {
1082    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1083    roffset=offset;    }
1084      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1085      const double* left=&((*leftres)[roffset]);
1086      roffset=m_samplesize*tid;
1087      double* result=&(m_samples[roffset]);
1088    switch (m_op)    switch (m_op)
1089    {    {
1090      case SIN:        case SIN:  
1091      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1092      break;          break;
1093      case COS:      case COS:
1094      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1095      break;          break;
1096      case TAN:      case TAN:
1097      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1098      break;          break;
1099      case ASIN:      case ASIN:
1100      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1101      break;          break;
1102      case ACOS:      case ACOS:
1103      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1104      break;          break;
1105      case ATAN:      case ATAN:
1106      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1107      break;          break;
1108      case SINH:      case SINH:
1109      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1110      break;          break;
1111      case COSH:      case COSH:
1112      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1113      break;          break;
1114      case TANH:      case TANH:
1115      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1116      break;          break;
1117      case ERF:      case ERF:
1118  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1119      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");          throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1120  #else  #else
1121      tensor_unary_operation(m_samplesize, left, result, ::erf);          tensor_unary_operation(m_samplesize, left, result, ::erf);
1122      break;          break;
1123  #endif  #endif
1124     case ASINH:     case ASINH:
1125  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1126      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1127  #else  #else
1128      tensor_unary_operation(m_samplesize, left, result, ::asinh);          tensor_unary_operation(m_samplesize, left, result, ::asinh);
1129  #endif    #endif  
1130      break;          break;
1131     case ACOSH:     case ACOSH:
1132  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1133      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1134  #else  #else
1135      tensor_unary_operation(m_samplesize, left, result, ::acosh);          tensor_unary_operation(m_samplesize, left, result, ::acosh);
1136  #endif    #endif  
1137      break;          break;
1138     case ATANH:     case ATANH:
1139  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1140      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1141  #else  #else
1142      tensor_unary_operation(m_samplesize, left, result, ::atanh);          tensor_unary_operation(m_samplesize, left, result, ::atanh);
1143  #endif    #endif  
1144      break;          break;
1145      case LOG10:      case LOG10:
1146      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1147      break;          break;
1148      case LOG:      case LOG:
1149      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1150      break;          break;
1151      case SIGN:      case SIGN:
1152      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1153      break;          break;
1154      case ABS:      case ABS:
1155      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1156      break;          break;
1157      case NEG:      case NEG:
1158      tensor_unary_operation(m_samplesize, left, result, negate<double>());          tensor_unary_operation(m_samplesize, left, result, negate<double>());
1159      break;          break;
1160      case POS:      case POS:
1161      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1162      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1163      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1164      break;          break;
1165      case EXP:      case EXP:
1166      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1167      break;          break;
1168      case SQRT:      case SQRT:
1169      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1170      break;          break;
1171      case RECIP:      case RECIP:
1172      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1173      break;          break;
1174      case GZ:      case GZ:
1175      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1176      break;          break;
1177      case LZ:      case LZ:
1178      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1179      break;          break;
1180      case GEZ:      case GEZ:
1181      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1182      break;          break;
1183      case LEZ:      case LEZ:
1184      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1185      break;          break;
1186  // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1187      case NEZ:      case NEZ:
1188      tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));          tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1189      break;          break;
1190      case EZ:      case EZ:
1191      tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));          tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1192      break;          break;
1193    
1194      default:      default:
1195      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1196    }    }
1197    return &v;    return &(m_samples);
1198  }  }
1199    
1200    
1201    const DataTypes::RealVectorType*
1202    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1203    {
1204            // we assume that any collapsing has been done before we get here
1205            // since we only have one argument we don't need to think about only
1206            // processing single points.
1207            // we will also know we won't get identity nodes
1208      if (m_readytype!='E')
1209      {
1210        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1211      }
1212      if (m_op==IDENTITY)
1213      {
1214        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1215      }
1216      size_t loffset=0;
1217      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1218    
1219      roffset=m_samplesize*tid;
1220      unsigned int ndpps=getNumDPPSample();
1221      unsigned int psize=DataTypes::noValues(m_left->getShape());
1222      double* result=&(m_samples[roffset]);
1223      switch (m_op)
1224      {
1225        case MINVAL:
1226            {
1227              for (unsigned int z=0;z<ndpps;++z)
1228              {
1229                FMin op;
1230                *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1231                loffset+=psize;
1232                result++;
1233              }
1234            }
1235            break;
1236        case MAXVAL:
1237            {
1238              for (unsigned int z=0;z<ndpps;++z)
1239              {
1240              FMax op;
1241              *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1242              loffset+=psize;
1243              result++;
1244              }
1245            }
1246            break;
1247        default:
1248            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1249      }
1250      return &(m_samples);
1251    }
1252    
1253    const DataTypes::RealVectorType*
1254    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1255  {  {
1256      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1257      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1258      // processing single points.          // processing single points.
1259    if (m_readytype!='E')    if (m_readytype!='E')
1260    {    {
1261      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1262    }    }
1263      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1264    size_t subroffset=roffset+m_samplesize;    {
1265    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1266    roffset=offset;    }
1267      size_t subroffset;
1268      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1269      roffset=m_samplesize*tid;
1270      size_t loop=0;
1271      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1272      size_t step=getNoValues();
1273      size_t offset=roffset;
1274    switch (m_op)    switch (m_op)
1275    {    {
1276      case SYM:      case SYM:
1277      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1278      break;          {
1279                DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1280                subroffset+=step;
1281                offset+=step;
1282            }
1283            break;
1284      case NSYM:      case NSYM:
1285      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1286      break;          {
1287                DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1288                subroffset+=step;
1289                offset+=step;
1290            }
1291            break;
1292      default:      default:
1293      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1294    }    }
1295    return &v;    return &m_samples;
1296  }  }
1297    
1298  /*  const DataTypes::RealVectorType*
1299    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1300  {  {
1301      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1302      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1303      // processing single points.          // processing single points.
1304    if (m_readytype!='E')    if (m_readytype!='E')
1305    {    {
1306      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1307    }    }
1308      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1309    size_t subroffset=roffset+m_samplesize;    {
1310    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1311    roffset=offset;    }
1312      size_t subroffset;
1313      size_t offset;
1314      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1315      roffset=m_samplesize*tid;
1316      offset=roffset;
1317      size_t loop=0;
1318      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1319      size_t outstep=getNoValues();
1320      size_t instep=m_left->getNoValues();
1321    switch (m_op)    switch (m_op)
1322    {    {
1323      case TRACE:      case TRACE:
1324           DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);          for (loop=0;loop<numsteps;++loop)
1325      break;          {
1326                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1327                subroffset+=instep;
1328                offset+=outstep;
1329            }
1330            break;
1331      case TRANS:      case TRANS:
1332           DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);          for (loop=0;loop<numsteps;++loop)
1333      break;          {
1334                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1335                subroffset+=instep;
1336                offset+=outstep;
1337            }
1338            break;
1339      default:      default:
1340      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1341    }    }
1342    return &v;    return &m_samples;
1343  }  }
1344    
1345    
1346  #define PROC_OP(TYPE,X)                               \  const DataTypes::RealVectorType*
1347      for (int i=0;i<steps;++i,resultp+=resultStep) \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1348      { \  {
1349         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    if (m_readytype!='E')
1350         lroffset+=leftStep; \    {
1351         rroffset+=rightStep; \      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1352      }    }
1353      if (m_op==IDENTITY)
1354      {
1355        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1356      }
1357      size_t subroffset;
1358      size_t offset;
1359      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1360      roffset=m_samplesize*tid;
1361      offset=roffset;
1362      size_t loop=0;
1363      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1364      size_t outstep=getNoValues();
1365      size_t instep=m_left->getNoValues();
1366      switch (m_op)
1367      {
1368        case SWAP:
1369            for (loop=0;loop<numsteps;++loop)
1370            {
1371                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1372                subroffset+=instep;
1373                offset+=outstep;
1374            }
1375            break;
1376        default:
1377            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1378      }
1379      return &m_samples;
1380    }
1381    
1382    const DataTypes::RealVectorType*
1383    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1384    {
1385      if (m_readytype!='E')
1386      {
1387        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1388      }
1389      if (m_op!=CONDEVAL)
1390      {
1391        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1392      }
1393      size_t subroffset;
1394    
1395      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1396      const ValueType* srcres=0;
1397      if ((*maskres)[subroffset]>0)
1398      {
1399            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1400      }
1401      else
1402      {
1403            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1404      }
1405    
1406      // Now we need to copy the result
1407    
1408      roffset=m_samplesize*tid;
1409      for (int i=0;i<m_samplesize;++i)
1410      {
1411            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1412      }
1413    
1414      return &m_samples;
1415    }
1416    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1417  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1418  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1419  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 998  DataLazy::resolveNP1OUT_P(ValueType& v, Line 1423  DataLazy::resolveNP1OUT_P(ValueType& v,
1423  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1424  // For example, 2+Vector.  // For example, 2+Vector.
1425  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1426  DataTypes::ValueType*  const DataTypes::RealVectorType*
1427  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1428  {  {
1429  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1430    
1431    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1432      // first work out which of the children are expanded          // first work out which of the children are expanded
1433    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1434    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1435    if (!leftExp && !rightExp)    if (!leftExp && !rightExp)
1436    {    {
1437      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1438    }    }
1439    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1440    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1441    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1442    int steps=(bigloops?1:getNumDPPSample());    {
1443    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1444    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1445    {    size_t leftsize=m_left->getNoValues();
1446      if (!leftScalar && !rightScalar)    size_t rightsize=m_right->getNoValues();
1447      {    size_t chunksize=1;                   // how many doubles will be processed in one go
1448         throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");    int leftstep=0;               // how far should the left offset advance after each step
1449      }    int rightstep=0;
1450      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    int numsteps=0;               // total number of steps for the inner loop
1451      chunksize=1;    // for scalar    int oleftstep=0;      // the o variables refer to the outer loop
1452    }        int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1453    int leftStep=((leftExp && (!rightExp || rightScalar))? m_right->getNoValues() : 0);    int onumsteps=1;
1454    int rightStep=((rightExp && (!leftExp || leftScalar))? m_left->getNoValues() : 0);    
1455    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1456      // Get the values of sub-expressions    bool RES=(rightExp && rightScalar);
1457    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    bool LS=(!leftExp && leftScalar);     // left is a single scalar
1458    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    bool RS=(!rightExp && rightScalar);
1459      // the right child starts further along.    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1460    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    bool RN=(!rightExp && !rightScalar);
1461      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1462      bool REN=(rightExp && !rightScalar);
1463    
1464      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1465      {
1466            chunksize=m_left->getNumDPPSample()*leftsize;
1467            leftstep=0;
1468            rightstep=0;
1469            numsteps=1;
1470      }
1471      else if (LES || RES)
1472      {
1473            chunksize=1;
1474            if (LES)                // left is an expanded scalar
1475            {
1476                    if (RS)
1477                    {
1478                       leftstep=1;
1479                       rightstep=0;
1480                       numsteps=m_left->getNumDPPSample();
1481                    }
1482                    else            // RN or REN
1483                    {
1484                       leftstep=0;
1485                       oleftstep=1;
1486                       rightstep=1;
1487                       orightstep=(RN ? -(int)rightsize : 0);
1488                       numsteps=rightsize;
1489                       onumsteps=m_left->getNumDPPSample();
1490                    }
1491            }
1492            else            // right is an expanded scalar
1493            {
1494                    if (LS)
1495                    {
1496                       rightstep=1;
1497                       leftstep=0;
1498                       numsteps=m_right->getNumDPPSample();
1499                    }
1500                    else
1501                    {
1502                       rightstep=0;
1503                       orightstep=1;
1504                       leftstep=1;
1505                       oleftstep=(LN ? -(int)leftsize : 0);
1506                       numsteps=leftsize;
1507                       onumsteps=m_right->getNumDPPSample();
1508                    }
1509            }
1510      }
1511      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1512      {
1513            if (LEN)        // and Right will be a single value
1514            {
1515                    chunksize=rightsize;
1516                    leftstep=rightsize;
1517                    rightstep=0;
1518                    numsteps=m_left->getNumDPPSample();
1519                    if (RS)
1520                    {
1521                       numsteps*=leftsize;
1522                    }
1523            }
1524            else    // REN
1525            {
1526                    chunksize=leftsize;
1527                    rightstep=leftsize;
1528                    leftstep=0;
1529                    numsteps=m_right->getNumDPPSample();
1530                    if (LS)
1531                    {
1532                       numsteps*=rightsize;
1533                    }
1534            }
1535      }
1536    
1537      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1538            // Get the values of sub-expressions
1539      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1540      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1541    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1542    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1543    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1544    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1545    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1546    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1547    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1548    
1549    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1550    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1551    
1552    
1553      roffset=m_samplesize*tid;
1554      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1555    switch(m_op)    switch(m_op)
1556    {    {
1557      case ADD:      case ADD:
1558          PROC_OP(NO_ARG,plus<double>());          PROC_OP(NO_ARG,plus<double>());
1559      break;          break;
1560      case SUB:      case SUB:
1561      PROC_OP(NO_ARG,minus<double>());          PROC_OP(NO_ARG,minus<double>());
1562      break;          break;
1563      case MUL:      case MUL:
1564      PROC_OP(NO_ARG,multiplies<double>());          PROC_OP(NO_ARG,multiplies<double>());
1565      break;          break;
1566      case DIV:      case DIV:
1567      PROC_OP(NO_ARG,divides<double>());          PROC_OP(NO_ARG,divides<double>());
1568      break;          break;
1569      case POW:      case POW:
1570         PROC_OP(double (double,double),::pow);         PROC_OP(double (double,double),::pow);
1571      break;          break;
1572      default:      default:
1573      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1574    }    }
1575    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1576    return &v;    return &m_samples;
1577  }  }
1578    
1579    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1580  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1581  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1582  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1583  DataTypes::ValueType*  const DataTypes::RealVectorType*
1584  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1585  {  {
1586  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1587    
1588    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1589      // first work out which of the children are expanded          // first work out which of the children are expanded
1590    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1591    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1592    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1593    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1594    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1595    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1596      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1597    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1598    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1599    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1600      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1601    
1602      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1603    
1604    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1605    cout << getNoValues() << endl;)
1606    
1607    
1608    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1609    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1610    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1611    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1612    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1613    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1614    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1615    
1616      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1617    switch(m_op)    switch(m_op)
1618    {    {
1619      case PROD:      case PROD:
1620      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1621      {          {
1622            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1623            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1624            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
1625        lroffset+=leftStep;  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1626        rroffset+=rightStep;  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1627      }  
1628      break;            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1629    
1630              lroffset+=leftStep;
1631              rroffset+=rightStep;
1632            }
1633            break;
1634      default:      default:
1635      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1636    }    }
1637    roffset=offset;    roffset=offset;
1638    return &v;    return &m_samples;
1639  }  }
1640    
1641    
1642    const DataTypes::RealVectorType*
1643    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1644    {
1645    #ifdef _OPENMP
1646            int tid=omp_get_thread_num();
1647    #else
1648            int tid=0;
1649    #endif
1650    
1651  /*  #ifdef LAZY_STACK_PROF
1652    \brief Compute the value of the expression for the given sample.          stackstart[tid]=&tid;
1653    \return Vector which stores the value of the subexpression for the given sample.          stackend[tid]=&tid;
1654    \param v A vector to store intermediate results.          const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1655    \param offset Index in v to begin storing results.          size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1656    \param sampleNo Sample number to evaluate.          #pragma omp critical
1657    \param roffset (output parameter) the offset in the return vector where the result begins.          if (d>maxstackuse)
1658            {
1659    cout << "Max resolve Stack use " << d << endl;
1660                    maxstackuse=d;
1661            }
1662            return r;
1663    #else
1664            return resolveNodeSample(tid, sampleNo, roffset);
1665    #endif
1666    }
1667    
1668    The return value will be an existing vector so do not deallocate it.  
1669  */  // This needs to do the work of the identity constructor
1670  // the vector and the offset are a place where the method could write its data if it wishes  void
1671  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::resolveToIdentity()
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
1672  {  {
1673  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)     if (m_op==IDENTITY)
1674      // collapse so we have a 'E' node or an IDENTITY for some other type          return;
1675    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1676    {     makeIdentity(p);
1677      collapse();  }
1678    }  
1679    if (m_op==IDENTITY)    void DataLazy::makeIdentity(const DataReady_ptr& p)
1680    {
1681       m_op=IDENTITY;
1682       m_axis_offset=0;
1683       m_transpose=0;
1684       m_SL=m_SM=m_SR=0;
1685       m_children=m_height=0;
1686       m_id=p;
1687       if(p->isConstant()) {m_readytype='C';}
1688       else if(p->isExpanded()) {m_readytype='E';}
1689       else if (p->isTagged()) {m_readytype='T';}
1690       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1691       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1692       m_left.reset();
1693       m_right.reset();
1694    }
1695    
1696    
1697    DataReady_ptr
1698    DataLazy::resolve()
1699    {
1700        resolveToIdentity();
1701        return m_id;
1702    }
1703    
1704    
1705    /* This is really a static method but I think that caused problems in windows */
1706    void
1707    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1708    {
1709      if (dats.empty())
1710    {    {
1711      const ValueType& vec=m_id->getVector();          return;
     if (m_readytype=='C')  
     {  
     roffset=0;  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
     return &(vec);  
1712    }    }
1713    if (m_readytype!='E')    vector<DataLazy*> work;
1714    {    FunctionSpace fs=dats[0]->getFunctionSpace();
1715      throw DataException("Programmer Error - Collapse did not produce an expanded node.");    bool match=true;
1716      for (int i=dats.size()-1;i>=0;--i)
1717      {
1718            if (dats[i]->m_readytype!='E')
1719            {
1720                    dats[i]->collapse();
1721            }
1722            if (dats[i]->m_op!=IDENTITY)
1723            {
1724                    work.push_back(dats[i]);
1725                    if (fs!=dats[i]->getFunctionSpace())
1726                    {
1727                            match=false;
1728                    }
1729            }
1730      }
1731      if (work.empty())
1732      {
1733            return;         // no work to do
1734      }
1735      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1736      {             // it is possible that dats[0] is one of the objects which we discarded and
1737                    // all the other functionspaces match.
1738            vector<DataExpanded*> dep;
1739            vector<ValueType*> vecs;
1740            for (int i=0;i<work.size();++i)
1741            {
1742                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1743                    vecs.push_back(&(dep[i]->getVectorRW()));
1744            }
1745            int totalsamples=work[0]->getNumSamples();
1746            const ValueType* res=0; // Storage for answer
1747            int sample;
1748            #pragma omp parallel private(sample, res)
1749            {
1750                size_t roffset=0;
1751                #pragma omp for schedule(static)
1752                for (sample=0;sample<totalsamples;++sample)
1753                {
1754                    roffset=0;
1755                    int j;
1756                    for (j=work.size()-1;j>=0;--j)
1757                    {
1758    #ifdef _OPENMP
1759                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1760    #else
1761                        res=work[j]->resolveNodeSample(0,sample,roffset);
1762    #endif
1763                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1764                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1765                    }
1766                }
1767            }
1768            // Now we need to load the new results as identity ops into the lazy nodes
1769            for (int i=work.size()-1;i>=0;--i)
1770            {
1771                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1772            }
1773    }    }
1774    switch (getOpgroup(m_op))    else  // functionspaces do not match
1775    {    {
1776    case G_UNARY:          for (int i=0;i<work.size();++i)
1777    case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);          {
1778    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);                  work[i]->resolveToIdentity();
1779    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);          }
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1780    }    }
1781  }  }
1782    
1783    
1784  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1785  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1786  DataReady_ptr  DataReady_ptr
1787  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1788  {  {
1789      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  
   
   if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally  
1790    {    {
1791      collapse();      collapse();
1792    }    }
1793    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1794    {    {
1795      return m_id;      return m_id;
1796    }    }
1797      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
1798    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1799    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1800    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1801    
1802    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1803    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1804    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;       // Storage for answer
1805    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1806    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
1807    for (sample=0;sample<totalsamples;++sample)    {
1808    {          size_t roffset=0;
1809  LAZYDEBUG(cout << "################################# " << sample << endl;)  #ifdef LAZY_STACK_PROF
1810            stackstart[omp_get_thread_num()]=&roffset;
1811            stackend[omp_get_thread_num()]=&roffset;
1812    #endif
1813            #pragma omp for schedule(static)
1814            for (sample=0;sample<totalsamples;++sample)
1815            {
1816                    roffset=0;
1817  #ifdef _OPENMP  #ifdef _OPENMP
1818      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1819  #else  #else
1820      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1821  #endif  #endif
1822  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1823      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1824  LAZYDEBUG(cerr << "offset=" << outoffset << endl;)                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1825      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1826      {          }
1827      resvec[outoffset]=(*res)[resoffset];    }
1828      }  #ifdef LAZY_STACK_PROF
1829  LAZYDEBUG(cerr << "*********************************" << endl;)    for (int i=0;i<getNumberOfThreads();++i)
1830      {
1831            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1832    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1833            if (r>maxstackuse)
1834            {
1835                    maxstackuse=r;
1836            }
1837    }    }
1838      cout << "Max resolve Stack use=" << maxstackuse << endl;
1839    #endif
1840    return resptr;    return resptr;
1841  }  }
1842    
# Line 1224  std::string Line 1844  std::string
1844  DataLazy::toString() const  DataLazy::toString() const
1845  {  {
1846    ostringstream oss;    ostringstream oss;
1847    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1848    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1849      {
1850      case 1:       // tree format
1851            oss << endl;
1852            intoTreeString(oss,"");
1853            break;
1854      case 2:       // just the depth
1855            break;
1856      default:
1857            intoString(oss);
1858            break;
1859      }
1860    return oss.str();    return oss.str();
1861  }  }
1862    
# Line 1233  DataLazy::toString() const Line 1864  DataLazy::toString() const
1864  void  void
1865  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1866  {  {
1867    //    oss << "[" << m_children <<";"<<m_height <<"]";
1868    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1869    {    {
1870    case G_IDENTITY:    case G_IDENTITY:
1871      if (m_id->isExpanded())          if (m_id->isExpanded())
1872      {          {
1873         oss << "E";             oss << "E";
1874      }          }
1875      else if (m_id->isTagged())          else if (m_id->isTagged())
1876      {          {
1877        oss << "T";            oss << "T";
1878      }          }
1879      else if (m_id->isConstant())          else if (m_id->isConstant())
1880      {          {
1881        oss << "C";            oss << "C";
1882      }          }
1883      else          else
1884      {          {
1885        oss << "?";            oss << "?";
1886      }          }
1887      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1888      break;          break;
1889    case G_BINARY:    case G_BINARY:
1890      oss << '(';          oss << '(';
1891      m_left->intoString(oss);          m_left->intoString(oss);
1892      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1893      m_right->intoString(oss);          m_right->intoString(oss);
1894      oss << ')';          oss << ')';
1895      break;          break;
1896    case G_UNARY:    case G_UNARY:
1897    case G_UNARY_P:    case G_UNARY_P:
1898    case G_NP1OUT:    case G_NP1OUT:
1899    case G_NP1OUT_P:    case G_NP1OUT_P:
1900      oss << opToString(m_op) << '(';    case G_REDUCTION:
1901      m_left->intoString(oss);          oss << opToString(m_op) << '(';
1902      oss << ')';          m_left->intoString(oss);
1903      break;          oss << ')';
1904            break;
1905    case G_TENSORPROD:    case G_TENSORPROD:
1906      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1907      m_left->intoString(oss);          m_left->intoString(oss);
1908      oss << ", ";          oss << ", ";
1909      m_right->intoString(oss);          m_right->intoString(oss);
1910      oss << ')';          oss << ')';
1911      break;          break;
1912      case G_NP1OUT_2P:
1913            oss << opToString(m_op) << '(';
1914            m_left->intoString(oss);
1915            oss << ", " << m_axis_offset << ", " << m_transpose;
1916            oss << ')';
1917            break;
1918      case G_CONDEVAL:
1919            oss << opToString(m_op)<< '(' ;
1920            m_mask->intoString(oss);
1921            oss << " ? ";
1922            m_left->intoString(oss);
1923            oss << " : ";
1924            m_right->intoString(oss);
1925            oss << ')';
1926            break;
1927    default:    default:
1928      oss << "UNKNOWN";          oss << "UNKNOWN";
1929    }    }
1930  }  }
1931    
1932    
1933    void
1934    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1935    {
1936      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1937      switch (getOpgroup(m_op))
1938      {
1939      case G_IDENTITY:
1940            if (m_id->isExpanded())
1941            {
1942               oss << "E";
1943            }
1944            else if (m_id->isTagged())
1945            {
1946              oss << "T";
1947            }
1948            else if (m_id->isConstant())
1949            {
1950              oss << "C";
1951            }
1952            else
1953            {
1954              oss << "?";
1955            }
1956            oss << '@' << m_id.get() << endl;
1957            break;
1958      case G_BINARY:
1959            oss << opToString(m_op) << endl;
1960            indent+='.';
1961            m_left->intoTreeString(oss, indent);
1962            m_right->intoTreeString(oss, indent);
1963            break;
1964      case G_UNARY:
1965      case G_UNARY_P:
1966      case G_NP1OUT:
1967      case G_NP1OUT_P:
1968      case G_REDUCTION:
1969            oss << opToString(m_op) << endl;
1970            indent+='.';
1971            m_left->intoTreeString(oss, indent);
1972            break;
1973      case G_TENSORPROD:
1974            oss << opToString(m_op) << endl;
1975            indent+='.';
1976            m_left->intoTreeString(oss, indent);
1977            m_right->intoTreeString(oss, indent);
1978            break;
1979      case G_NP1OUT_2P:
1980            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1981            indent+='.';
1982            m_left->intoTreeString(oss, indent);
1983            break;
1984      default:
1985            oss << "UNKNOWN";
1986      }
1987    }
1988    
1989    
1990  DataAbstract*  DataAbstract*
1991  DataLazy::deepCopy()  DataLazy::deepCopy() const
1992  {  {
1993    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1994    {    {
1995    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1996    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1997    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1998      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1999      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2000    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2001    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2002      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2003      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2004    default:    default:
2005      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2006    }    }
2007  }  }
2008    
2009    
2010    
2011  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2012  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2013  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
2014  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2015  // form part of the expression.  // form part of the expression.
2016  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2017  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2018  DataLazy::getLength() const  DataLazy::getLength() const
2019  {  {
2020    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1318  DataLazy::getSlice(const DataTypes::Regi Line 2029  DataLazy::getSlice(const DataTypes::Regi
2029    
2030    
2031  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2032  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2033  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2034                   int dataPointNo)                   int dataPointNo)
2035  {  {
2036    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2037    {    {
2038      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2039    }    }
2040    if (m_readytype!='E')    if (m_readytype!='E')
2041    {    {
2042      collapse();          collapse();
2043      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2044    }    }
2045    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
2046    // so we only need to know which child to ask    // so we only need to know which child to ask
2047    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
2048    {    {
2049      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2050    }    }
2051    else    else
2052    {    {
2053      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2054    }    }
2055  }  }
2056    
2057  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2058  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2059  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2060                   int dataPointNo) const                   int dataPointNo) const
2061  {  {
2062    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2063    {    {
2064      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2065    }    }
2066    if (m_readytype=='E')    if (m_readytype=='E')
2067    {    {
# Line 1358  DataLazy::getPointOffset(int sampleNo, Line 2069  DataLazy::getPointOffset(int sampleNo,
2069      // so we only need to know which child to ask      // so we only need to know which child to ask
2070      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
2071      {      {
2072      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2073      }      }
2074      else      else
2075      {      {
2076      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2077      }      }
2078    }    }
2079    if (m_readytype=='C')    if (m_readytype=='C')
2080    {    {
2081      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2082    }    }
2083    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2084  }  }
2085    
2086  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2087  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2088  void  void
2089  DataLazy::setToZero()  DataLazy::setToZero()
2090  {  {
2091    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2092    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2093    m_op=IDENTITY;  //   m_op=IDENTITY;
2094    m_right.reset();    //   m_right.reset();  
2095    m_left.reset();  //   m_left.reset();
2096    m_readytype='C';  //   m_readytype='C';
2097    m_buffsRequired=1;  //   m_buffsRequired=1;
2098    
2099      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2100      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2101  }  }
2102    
2103  }   // end namespace  bool
2104    DataLazy::actsExpanded() const
2105    {
2106            return (m_readytype=='E');
2107    }
2108    
2109    } // end namespace
2110    

Legend:
Removed from v.2147  
changed lines
  Added in v.5997

  ViewVC Help
Powered by ViewVC 1.1.26