/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2082 by caltinay, Fri Nov 21 01:46:05 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 6001 by caltinay, Tue Mar 1 05:01:49 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18    #include "Data.h"
19    #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "UnaryFuncs.h"    // for escript::fsign
23    #include "Utils.h"
24    
25  #ifdef USE_NETCDF  #ifdef USE_NETCDF
26  #include <netcdfcpp.h>  #include <netcdfcpp.h>
27  #endif  #endif
28  #ifdef PASO_MPI  
29  #include <mpi.h>  #include <iomanip> // for some fancy formatting in debug
30  #endif  
31  #ifdef _OPENMP  using namespace escript::DataTypes;
32  #include <omp.h>  
33  #endif  #define NO_ARG
34  #include "FunctionSpace.h"  
35  #include "DataTypes.h"  // #define LAZYDEBUG(X) if (privdebug){X;}
36  #include "Data.h"  #define LAZYDEBUG(X)
37  #include "UnaryFuncs.h"     // for escript::fsign  namespace
38  #include "Utils.h"  {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48    
49    
50    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52  /*  /*
53  How does DataLazy work?  How does DataLazy work?
# Line 39  A special operation, IDENTITY, stores an Line 60  A special operation, IDENTITY, stores an
60  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
64    
65  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 47  I will refer to individual DataLazy obje Line 68  I will refer to individual DataLazy obje
68    
69  Each node also stores:  Each node also stores:
70  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71      evaluated.          evaluated.
72  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
73      evaluate the expression.          evaluate the expression.
74  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
75    
76  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 70  The convention that I use, is that the r Line 91  The convention that I use, is that the r
91  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93    
94  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95  1) Add to the ES_optype.  1) Add to the ES_optype.
96  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
97  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 90  namespace escript Line 111  namespace escript
111  namespace  namespace
112  {  {
113    
114    
115    // enabling this will print out when ever the maximum stacksize used by resolve increases
116    // it assumes _OPENMP is also in use
117    //#define LAZY_STACK_PROF
118    
119    
120    
121    #ifndef _OPENMP
122      #ifdef LAZY_STACK_PROF
123      #undef LAZY_STACK_PROF
124      #endif
125    #endif
126    
127    
128    #ifdef LAZY_STACK_PROF
129    std::vector<void*> stackstart(getNumberOfThreads());
130    std::vector<void*> stackend(getNumberOfThreads());
131    size_t maxstackuse=0;
132    #endif
133    
134  enum ES_opgroup  enum ES_opgroup
135  {  {
136     G_UNKNOWN,     G_UNKNOWN,
137     G_IDENTITY,     G_IDENTITY,
138     G_BINARY,        // pointwise operations with two arguments     G_BINARY,            // pointwise operations with two arguments
139     G_UNARY,     // pointwise operations with one argument     G_UNARY,             // pointwise operations with one argument
140     G_NP1OUT,        // non-pointwise op with one output     G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
141     G_TENSORPROD     // general tensor product     G_NP1OUT,            // non-pointwise op with one output
142       G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
143       G_TENSORPROD,        // general tensor product
144       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
145       G_REDUCTION,         // non-pointwise unary op with a scalar output
146       G_CONDEVAL
147  };  };
148    
149    
150    
151    
152  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
153              "sin","cos","tan",                          "sin","cos","tan",
154              "asin","acos","atan","sinh","cosh","tanh","erf",                          "asin","acos","atan","sinh","cosh","tanh","erf",
155              "asinh","acosh","atanh",                          "asinh","acosh","atanh",
156              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "log10","log","sign","abs","neg","pos","exp","sqrt",
157              "1/","where>0","where<0","where>=0","where<=0",                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
158              "symmetric","nonsymmetric",                          "symmetric","nonsymmetric",
159              "prod"};                          "prod",
160  int ES_opcount=36;                          "transpose", "trace",
161                            "swapaxes",
162                            "minval", "maxval",
163                            "condEval"};
164    int ES_opcount=44;
165  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
166              G_UNARY,G_UNARY,G_UNARY, //10                          G_UNARY,G_UNARY,G_UNARY, //10
167              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
168              G_UNARY,G_UNARY,G_UNARY,                    // 20                          G_UNARY,G_UNARY,G_UNARY,                                        // 20
169              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
170              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
171              G_NP1OUT,G_NP1OUT,                          G_NP1OUT,G_NP1OUT,
172              G_TENSORPROD};                          G_TENSORPROD,
173                            G_NP1OUT_P, G_NP1OUT_P,
174                            G_NP1OUT_2P,
175                            G_REDUCTION, G_REDUCTION,
176                            G_CONDEVAL};
177  inline  inline
178  ES_opgroup  ES_opgroup
179  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 131  getOpgroup(ES_optype op) Line 185  getOpgroup(ES_optype op)
185  FunctionSpace  FunctionSpace
186  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187  {  {
188      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
189      // maybe we need an interpolate node -          // maybe we need an interpolate node -
190      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
191      // programming error exception.          // programming error exception.
192    
193    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
194    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
195    if (l!=r)    if (l!=r)
196    {    {
197      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
198        if (res==1)
199      {      {
200      return l;          return l;
201      }      }
202      if (l.probeInterpolation(r))      if (res==-1)
203      {      {
204      return r;          return r;
205      }      }
206      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
207    }    }
# Line 158  resultFS(DataAbstract_ptr left, DataAbst Line 213  resultFS(DataAbstract_ptr left, DataAbst
213  DataTypes::ShapeType  DataTypes::ShapeType
214  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
215  {  {
216      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
217      {          {
218        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
219        {            {
220          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221        }            }
222        if (left->getRank()==0)   // we need to allow scalar * anything  
223        {            if (left->getRank()==0)       // we need to allow scalar * anything
224          return right->getShape();            {
225        }                  return right->getShape();
226        if (right->getRank()==0)            }
227        {            if (right->getRank()==0)
228          return left->getShape();            {
229        }                  return left->getShape();
230        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            }
231      }            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
232      return left->getShape();          }
233            return left->getShape();
234    }
235    
236    // return the shape for "op left"
237    
238    DataTypes::ShapeType
239    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240    {
241            switch(op)
242            {
243            case TRANS:
244               {                    // for the scoping of variables
245                    const DataTypes::ShapeType& s=left->getShape();
246                    DataTypes::ShapeType sh;
247                    int rank=left->getRank();
248                    if (axis_offset<0 || axis_offset>rank)
249                    {
250                stringstream e;
251                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252                throw DataException(e.str());
253            }
254            for (int i=0; i<rank; i++)
255                    {
256                       int index = (axis_offset+i)%rank;
257               sh.push_back(s[index]); // Append to new shape
258            }
259                    return sh;
260               }
261            break;
262            case TRACE:
263               {
264                    int rank=left->getRank();
265                    if (rank<2)
266                    {
267                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268                    }
269                    if ((axis_offset>rank-2) || (axis_offset<0))
270                    {
271                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272                    }
273                    if (rank==2)
274                    {
275                       return DataTypes::scalarShape;
276                    }
277                    else if (rank==3)
278                    {
279                       DataTypes::ShapeType sh;
280                       if (axis_offset==0)
281                       {
282                            sh.push_back(left->getShape()[2]);
283                       }
284                       else         // offset==1
285                       {
286                            sh.push_back(left->getShape()[0]);
287                       }
288                       return sh;
289                    }
290                    else if (rank==4)
291                    {
292                       DataTypes::ShapeType sh;
293                       const DataTypes::ShapeType& s=left->getShape();
294                       if (axis_offset==0)
295                       {
296                            sh.push_back(s[2]);
297                            sh.push_back(s[3]);
298                       }
299                       else if (axis_offset==1)
300                       {
301                            sh.push_back(s[0]);
302                            sh.push_back(s[3]);
303                       }
304                       else         // offset==2
305                       {
306                            sh.push_back(s[0]);
307                            sh.push_back(s[1]);
308                       }
309                       return sh;
310                    }
311                    else            // unknown rank
312                    {
313                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314                    }
315               }
316            break;
317            default:
318            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319            }
320    }
321    
322    DataTypes::ShapeType
323    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
324    {
325         // This code taken from the Data.cpp swapaxes() method
326         // Some of the checks are probably redundant here
327         int axis0_tmp,axis1_tmp;
328         const DataTypes::ShapeType& s=left->getShape();
329         DataTypes::ShapeType out_shape;
330         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332         int rank=left->getRank();
333         if (rank<2) {
334            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335         }
336         if (axis0<0 || axis0>rank-1) {
337            stringstream e;
338            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339            throw DataException(e.str());
340         }
341         if (axis1<0 || axis1>rank-1) {
342            stringstream e;
343            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344            throw DataException(e.str());
345         }
346         if (axis0 == axis1) {
347             throw DataException("Error - Data::swapaxes: axis indices must be different.");
348         }
349         if (axis0 > axis1) {
350             axis0_tmp=axis1;
351             axis1_tmp=axis0;
352         } else {
353             axis0_tmp=axis0;
354             axis1_tmp=axis1;
355         }
356         for (int i=0; i<rank; i++) {
357           if (i == axis0_tmp) {
358              out_shape.push_back(s[axis1_tmp]);
359           } else if (i == axis1_tmp) {
360              out_shape.push_back(s[axis0_tmp]);
361           } else {
362              out_shape.push_back(s[i]);
363           }
364         }
365        return out_shape;
366  }  }
367    
368    
369  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
370  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
371  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
372  DataTypes::ShapeType  DataTypes::ShapeType
373  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
374  {  {
375                
376    // Get rank and shape of inputs    // Get rank and shape of inputs
377    int rank0 = left->getRank();    int rank0 = left->getRank();
378    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 192  GTPShape(DataAbstract_ptr left, DataAbst Line 381  GTPShape(DataAbstract_ptr left, DataAbst
381    
382    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
383    int start0=0, start1=0;    int start0=0, start1=0;
384    if (transpose == 0)       {}    if (transpose == 0)           {}
385    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
386    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
387    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388    
389      if (rank0<axis_offset)
390      {
391            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392      }
393    
394    // Adjust the shapes for transpose    // Adjust the shapes for transpose
395    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
396    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
397    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
398    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
399    
400    // Prepare for the loops of the product    // Prepare for the loops of the product
401    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
402    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
403      SL *= tmpShape0[i];      SL *= tmpShape0[i];
404    }    }
405    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
406      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
407        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
408      }      }
409      SM *= tmpShape0[i];      SM *= tmpShape0[i];
410    }    }
411    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
412      SR *= tmpShape1[i];      SR *= tmpShape1[i];
413    }    }
414    
415    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
416    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
417    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
418       int out_index=0;       int out_index=0;
419       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
421    }    }
   return shape2;  
 }  
422    
423      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
424      {
425         ostringstream os;
426         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
427         throw DataException(os.str());
428      }
429    
430  // determine the number of points in the result of "left op right"    return shape2;
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
431  }  }
432    
433    }       // end anonymous namespace
 }   // end anonymous namespace  
434    
435    
436    
# Line 279  opToString(ES_optype op) Line 445  opToString(ES_optype op)
445    return ES_opstrings[op];    return ES_opstrings[op];
446  }  }
447    
448    void DataLazy::LazyNodeSetup()
449    {
450    #ifdef _OPENMP
451        int numthreads=omp_get_max_threads();
452        m_samples.resize(numthreads*m_samplesize);
453        m_sampleids=new int[numthreads];
454        for (int i=0;i<numthreads;++i)
455        {
456            m_sampleids[i]=-1;  
457        }
458    #else
459        m_samples.resize(m_samplesize);
460        m_sampleids=new int[1];
461        m_sampleids[0]=-1;
462    #endif  // _OPENMP
463    }
464    
465    
466    // Creates an identity node
467  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
468      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
469      m_op(IDENTITY),          ,m_sampleids(0),
470      m_axis_offset(0),          m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
471  {  {
472     if (p->isLazy())     if (p->isLazy())
473     {     {
474      // I don't want identity of Lazy.          // I don't want identity of Lazy.
475      // Question: Why would that be so bad?          // Question: Why would that be so bad?
476      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
477      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
478     }     }
479     else     else
480     {     {
481      m_id=dynamic_pointer_cast<DataReady>(p);          p->makeLazyShared();
482      if(p->isConstant()) {m_readytype='C';}          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
483      else if(p->isExpanded()) {m_readytype='E';}          makeIdentity(dr);
484      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
485     }     }
486     m_buffsRequired=1;  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
487  }  }
488    
   
   
   
489  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
490      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
491      m_op(op),          m_op(op),
492      m_axis_offset(0),          m_axis_offset(0),
493      m_transpose(0),          m_transpose(0),
494      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
495  {  {
496     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
497     {     {
498      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
499     }     }
500    
501     DataLazy_ptr lleft;     DataLazy_ptr lleft;
502     if (!left->isLazy())     if (!left->isLazy())
503     {     {
504      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
505     }     }
506     else     else
507     {     {
508      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
509     }     }
510     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
511     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
512     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
513     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
514       m_height=m_left->m_height+1;
515       LazyNodeSetup();
516       SIZELIMIT
517  }  }
518    
519    
520  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
521  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
522      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
523      m_op(op),          m_op(op),
524      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
525  {  {
526    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
527     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
528     {     {
529      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
530     }     }
531    
532     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
533     {     {
534      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
535      Data ltemp(left);          Data ltemp(left);
536      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
537      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
538     }     }
539     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
540     {     {
541      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
542      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
543    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
544     }     }
545     left->operandCheck(*right);     left->operandCheck(*right);
546    
547     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
548     {     {
549      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
550    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
551     }     }
552     else     else
553     {     {
554      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
555    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
556     }     }
557     if (right->isLazy())     if (right->isLazy())
558     {     {
559      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
560    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
561     }     }
562     else     else
563     {     {
564      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
565    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
566     }     }
567     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
568     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
569     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
570     {     {
571      m_readytype='E';          m_readytype='E';
572     }     }
573     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
574     {     {
575      m_readytype='T';          m_readytype='T';
576     }     }
577     else     else
578     {     {
579      m_readytype='C';          m_readytype='C';
580     }     }
581     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
582     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
583     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
584  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
585       SIZELIMIT
586    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
587  }  }
588    
589  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
590      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
591      m_op(op),          m_op(op),
592      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
593      m_transpose(transpose)          m_transpose(transpose)
594  {  {
595     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
596     {     {
597      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
598     }     }
599     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
600     {     {
601      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
602     }     }
603     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
604     {     {
605      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
606      Data ltemp(left);          Data ltemp(left);
607      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
608      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
609     }     }
610     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
611     {     {
612      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
613      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
614     }     }
615     left->operandCheck(*right);  //    left->operandCheck(*right);
616    
617     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
618     {     {
619      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
620     }     }
621     else     else
622     {     {
623      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
624     }     }
625     if (right->isLazy())     if (right->isLazy())
626     {     {
627      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
628     }     }
629     else     else
630     {     {
631      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
632     }     }
633     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
634     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
635     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
636     {     {
637      m_readytype='E';          m_readytype='E';
638     }     }
639     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
640     {     {
641      m_readytype='T';          m_readytype='T';
642     }     }
643     else     else
644     {     {
645      m_readytype='C';          m_readytype='C';
646     }     }
647     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
648     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
649     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
650  cout << "(4)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
651       SIZELIMIT
652    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
653  }  }
654    
655    
656  DataLazy::~DataLazy()  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
657            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
658            m_op(op),
659            m_axis_offset(axis_offset),
660            m_transpose(0),
661            m_tol(0)
662    {
663       if ((getOpgroup(op)!=G_NP1OUT_P))
664       {
665            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
666       }
667       DataLazy_ptr lleft;
668       if (!left->isLazy())
669       {
670            lleft=DataLazy_ptr(new DataLazy(left));
671       }
672       else
673       {
674            lleft=dynamic_pointer_cast<DataLazy>(left);
675       }
676       m_readytype=lleft->m_readytype;
677       m_left=lleft;
678       m_samplesize=getNumDPPSample()*getNoValues();
679       m_children=m_left->m_children+1;
680       m_height=m_left->m_height+1;
681       LazyNodeSetup();
682       SIZELIMIT
683    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
684    }
685    
686    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
687            : parent(left->getFunctionSpace(), left->getShape()),
688            m_op(op),
689            m_axis_offset(0),
690            m_transpose(0),
691            m_tol(tol)
692    {
693       if ((getOpgroup(op)!=G_UNARY_P))
694       {
695            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
696       }
697       DataLazy_ptr lleft;
698       if (!left->isLazy())
699       {
700            lleft=DataLazy_ptr(new DataLazy(left));
701       }
702       else
703       {
704            lleft=dynamic_pointer_cast<DataLazy>(left);
705       }
706       m_readytype=lleft->m_readytype;
707       m_left=lleft;
708       m_samplesize=getNumDPPSample()*getNoValues();
709       m_children=m_left->m_children+1;
710       m_height=m_left->m_height+1;
711       LazyNodeSetup();
712       SIZELIMIT
713    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
714    }
715    
716    
717    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
718            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
719            m_op(op),
720            m_axis_offset(axis0),
721            m_transpose(axis1),
722            m_tol(0)
723  {  {
724       if ((getOpgroup(op)!=G_NP1OUT_2P))
725       {
726            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
727       }
728       DataLazy_ptr lleft;
729       if (!left->isLazy())
730       {
731            lleft=DataLazy_ptr(new DataLazy(left));
732       }
733       else
734       {
735            lleft=dynamic_pointer_cast<DataLazy>(left);
736       }
737       m_readytype=lleft->m_readytype;
738       m_left=lleft;
739       m_samplesize=getNumDPPSample()*getNoValues();
740       m_children=m_left->m_children+1;
741       m_height=m_left->m_height+1;
742       LazyNodeSetup();
743       SIZELIMIT
744    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
745  }  }
746    
747    
748  int  namespace
 DataLazy::getBuffsRequired() const  
749  {  {
750      return m_buffsRequired;  
751        inline int max3(int a, int b, int c)
752        {
753            int t=(a>b?a:b);
754            return (t>c?t:c);
755    
756        }
757    }
758    
759    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
760            : parent(left->getFunctionSpace(), left->getShape()),
761            m_op(CONDEVAL),
762            m_axis_offset(0),
763            m_transpose(0),
764            m_tol(0)
765    {
766    
767       DataLazy_ptr lmask;
768       DataLazy_ptr lleft;
769       DataLazy_ptr lright;
770       if (!mask->isLazy())
771       {
772            lmask=DataLazy_ptr(new DataLazy(mask));
773       }
774       else
775       {
776            lmask=dynamic_pointer_cast<DataLazy>(mask);
777       }
778       if (!left->isLazy())
779       {
780            lleft=DataLazy_ptr(new DataLazy(left));
781       }
782       else
783       {
784            lleft=dynamic_pointer_cast<DataLazy>(left);
785       }
786       if (!right->isLazy())
787       {
788            lright=DataLazy_ptr(new DataLazy(right));
789       }
790       else
791       {
792            lright=dynamic_pointer_cast<DataLazy>(right);
793       }
794       m_readytype=lmask->m_readytype;
795       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
796       {
797            throw DataException("Programmer Error - condEval arguments must have the same readytype");
798       }
799       m_left=lleft;
800       m_right=lright;
801       m_mask=lmask;
802       m_samplesize=getNumDPPSample()*getNoValues();
803       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
804       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
805       LazyNodeSetup();
806       SIZELIMIT
807    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
808  }  }
809    
810    
811  size_t  
812  DataLazy::getMaxSampleSize() const  DataLazy::~DataLazy()
813  {  {
814      return m_maxsamplesize;     delete[] m_sampleids;
815  }  }
816    
817    
818  /*  /*
819    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
820    This does the work for the collapse method.    This does the work for the collapse method.
821    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
822  */  */
823  DataReady_ptr  DataReady_ptr
824  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
825  {  {
826    if (m_readytype=='E')    if (m_readytype=='E')
827    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
828      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
829    }    }
830    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 511  DataLazy::collapseToReady() Line 842  DataLazy::collapseToReady()
842    switch(m_op)    switch(m_op)
843    {    {
844      case ADD:      case ADD:
845      result=left+right;          result=left+right;
846      break;          break;
847      case SUB:            case SUB:          
848      result=left-right;          result=left-right;
849      break;          break;
850      case MUL:            case MUL:          
851      result=left*right;          result=left*right;
852      break;          break;
853      case DIV:            case DIV:          
854      result=left/right;          result=left/right;
855      break;          break;
856      case SIN:      case SIN:
857      result=left.sin();            result=left.sin();      
858      break;          break;
859      case COS:      case COS:
860      result=left.cos();          result=left.cos();
861      break;          break;
862      case TAN:      case TAN:
863      result=left.tan();          result=left.tan();
864      break;          break;
865      case ASIN:      case ASIN:
866      result=left.asin();          result=left.asin();
867      break;          break;
868      case ACOS:      case ACOS:
869      result=left.acos();          result=left.acos();
870      break;          break;
871      case ATAN:      case ATAN:
872      result=left.atan();          result=left.atan();
873      break;          break;
874      case SINH:      case SINH:
875      result=left.sinh();          result=left.sinh();
876      break;          break;
877      case COSH:      case COSH:
878      result=left.cosh();          result=left.cosh();
879      break;          break;
880      case TANH:      case TANH:
881      result=left.tanh();          result=left.tanh();
882      break;          break;
883      case ERF:      case ERF:
884      result=left.erf();          result=left.erf();
885      break;          break;
886     case ASINH:     case ASINH:
887      result=left.asinh();          result=left.asinh();
888      break;          break;
889     case ACOSH:     case ACOSH:
890      result=left.acosh();          result=left.acosh();
891      break;          break;
892     case ATANH:     case ATANH:
893      result=left.atanh();          result=left.atanh();
894      break;          break;
895      case LOG10:      case LOG10:
896      result=left.log10();          result=left.log10();
897      break;          break;
898      case LOG:      case LOG:
899      result=left.log();          result=left.log();
900      break;          break;
901      case SIGN:      case SIGN:
902      result=left.sign();          result=left.sign();
903      break;          break;
904      case ABS:      case ABS:
905      result=left.abs();          result=left.abs();
906      break;          break;
907      case NEG:      case NEG:
908      result=left.neg();          result=left.neg();
909      break;          break;
910      case POS:      case POS:
911      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
912      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
913      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
914      break;          break;
915      case EXP:      case EXP:
916      result=left.exp();          result=left.exp();
917      break;          break;
918      case SQRT:      case SQRT:
919      result=left.sqrt();          result=left.sqrt();
920      break;          break;
921      case RECIP:      case RECIP:
922      result=left.oneOver();          result=left.oneOver();
923      break;          break;
924      case GZ:      case GZ:
925      result=left.wherePositive();          result=left.wherePositive();
926      break;          break;
927      case LZ:      case LZ:
928      result=left.whereNegative();          result=left.whereNegative();
929      break;          break;
930      case GEZ:      case GEZ:
931      result=left.whereNonNegative();          result=left.whereNonNegative();
932      break;          break;
933      case LEZ:      case LEZ:
934      result=left.whereNonPositive();          result=left.whereNonPositive();
935      break;          break;
936        case NEZ:
937            result=left.whereNonZero(m_tol);
938            break;
939        case EZ:
940            result=left.whereZero(m_tol);
941            break;
942      case SYM:      case SYM:
943      result=left.symmetric();          result=left.symmetric();
944      break;          break;
945      case NSYM:      case NSYM:
946      result=left.nonsymmetric();          result=left.nonsymmetric();
947      break;          break;
948      case PROD:      case PROD:
949      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
950      break;          break;
951        case TRANS:
952            result=left.transpose(m_axis_offset);
953            break;
954        case TRACE:
955            result=left.trace(m_axis_offset);
956            break;
957        case SWAP:
958            result=left.swapaxes(m_axis_offset, m_transpose);
959            break;
960        case MINVAL:
961            result=left.minval();
962            break;
963        case MAXVAL:
964            result=left.minval();
965            break;
966      default:      default:
967      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
968    }    }
969    return result.borrowReadyPtr();    return result.borrowReadyPtr();
970  }  }
# Line 624  DataLazy::collapseToReady() Line 976  DataLazy::collapseToReady()
976     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
977  */  */
978  void  void
979  DataLazy::collapse()  DataLazy::collapse() const
980  {  {
981    if (m_op==IDENTITY)    if (m_op==IDENTITY)
982    {    {
983      return;          return;
984    }    }
985    if (m_readytype=='E')    if (m_readytype=='E')
986    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
987      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
988    }    }
989    m_id=collapseToReady();    m_id=collapseToReady();
990    m_op=IDENTITY;    m_op=IDENTITY;
991  }  }
992    
993  /*  
994    \brief Compute the value of the expression (unary operation) for the given sample.  
995    \return Vector which stores the value of the subexpression for the given sample.  
996    \param v A vector to store intermediate results.  
997    \param offset Index in v to begin storing results.  
998    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
999    \param roffset (output parameter) the offset in the return vector where the result begins.          for (int j=0;j<onumsteps;++j)\
1000            {\
1001    The return value will be an existing vector so do not deallocate it.            for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1002    If the result is stored in v it should be stored at the offset given.            { \
1003    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1004  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1005  DataTypes::ValueType*               tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1006  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1007                 lroffset+=leftstep; \
1008                 rroffset+=rightstep; \
1009              }\
1010              lroffset+=oleftstep;\
1011              rroffset+=orightstep;\
1012            }
1013    
1014    
1015    // The result will be stored in m_samples
1016    // The return value is a pointer to the DataVector, offset is the offset within the return value
1017    const DataTypes::RealVectorType*
1018    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1019  {  {
1020      // we assume that any collapsing has been done before we get here  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1021      // since we only have one argument we don't need to think about only          // collapse so we have a 'E' node or an IDENTITY for some other type
1022      // processing single points.    if (m_readytype!='E' && m_op!=IDENTITY)
1023      {
1024            collapse();
1025      }
1026      if (m_op==IDENTITY)  
1027      {
1028        const ValueType& vec=m_id->getVectorRO();
1029        roffset=m_id->getPointOffset(sampleNo, 0);
1030    #ifdef LAZY_STACK_PROF
1031    int x;
1032    if (&x<stackend[omp_get_thread_num()])
1033    {
1034           stackend[omp_get_thread_num()]=&x;
1035    }
1036    #endif
1037        return &(vec);
1038      }
1039      if (m_readytype!='E')
1040      {
1041        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1042      }
1043      if (m_sampleids[tid]==sampleNo)
1044      {
1045            roffset=tid*m_samplesize;
1046            return &(m_samples);            // sample is already resolved
1047      }
1048      m_sampleids[tid]=sampleNo;
1049    
1050      switch (getOpgroup(m_op))
1051      {
1052      case G_UNARY:
1053      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1054      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1055      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1056      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1057      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1058      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1059      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1060      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1061      default:
1062        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1063      }
1064    }
1065    
1066    const DataTypes::RealVectorType*
1067    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1068    {
1069            // we assume that any collapsing has been done before we get here
1070            // since we only have one argument we don't need to think about only
1071            // processing single points.
1072            // we will also know we won't get identity nodes
1073    if (m_readytype!='E')    if (m_readytype!='E')
1074    {    {
1075      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1076    }    }
1077    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1078    const double* left=&((*vleft)[roffset]);    {
1079    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1080    roffset=offset;    }
1081      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1082      const double* left=&((*leftres)[roffset]);
1083      roffset=m_samplesize*tid;
1084      double* result=&(m_samples[roffset]);
1085    switch (m_op)    switch (m_op)
1086    {    {
1087      case SIN:        case SIN:  
1088      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1089      break;          break;
1090      case COS:      case COS:
1091      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1092      break;          break;
1093      case TAN:      case TAN:
1094      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1095      break;          break;
1096      case ASIN:      case ASIN:
1097      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1098      break;          break;
1099      case ACOS:      case ACOS:
1100      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1101      break;          break;
1102      case ATAN:      case ATAN:
1103      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1104      break;          break;
1105      case SINH:      case SINH:
1106      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1107      break;          break;
1108      case COSH:      case COSH:
1109      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1110      break;          break;
1111      case TANH:      case TANH:
1112      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1113      break;          break;
1114      case ERF:      case ERF:
1115  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1116      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");          throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1117  #else  #else
1118      tensor_unary_operation(m_samplesize, left, result, ::erf);          tensor_unary_operation(m_samplesize, left, result, ::erf);
1119      break;          break;
1120  #endif  #endif
1121     case ASINH:     case ASINH:
1122  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1123      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1124  #else  #else
1125      tensor_unary_operation(m_samplesize, left, result, ::asinh);          tensor_unary_operation(m_samplesize, left, result, ::asinh);
1126  #endif    #endif  
1127      break;          break;
1128     case ACOSH:     case ACOSH:
1129  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1130      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1131  #else  #else
1132      tensor_unary_operation(m_samplesize, left, result, ::acosh);          tensor_unary_operation(m_samplesize, left, result, ::acosh);
1133  #endif    #endif  
1134      break;          break;
1135     case ATANH:     case ATANH:
1136  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1137      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1138  #else  #else
1139      tensor_unary_operation(m_samplesize, left, result, ::atanh);          tensor_unary_operation(m_samplesize, left, result, ::atanh);
1140  #endif    #endif  
1141      break;          break;
1142      case LOG10:      case LOG10:
1143      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1144      break;          break;
1145      case LOG:      case LOG:
1146      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1147      break;          break;
1148      case SIGN:      case SIGN:
1149      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1150      break;          break;
1151      case ABS:      case ABS:
1152      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1153      break;          break;
1154      case NEG:      case NEG:
1155      tensor_unary_operation(m_samplesize, left, result, negate<double>());          tensor_unary_operation(m_samplesize, left, result, negate<double>());
1156      break;          break;
1157      case POS:      case POS:
1158      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1159      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1160      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1161      break;          break;
1162      case EXP:      case EXP:
1163      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1164      break;          break;
1165      case SQRT:      case SQRT:
1166      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1167      break;          break;
1168      case RECIP:      case RECIP:
1169      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1170      break;          break;
1171      case GZ:      case GZ:
1172      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1173      break;          break;
1174      case LZ:      case LZ:
1175      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1176      break;          break;
1177      case GEZ:      case GEZ:
1178      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1179      break;          break;
1180      case LEZ:      case LEZ:
1181      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1182      break;          break;
1183    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1184        case NEZ:
1185            tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1186            break;
1187        case EZ:
1188            tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1189            break;
1190    
1191      default:      default:
1192      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1193    }    }
1194    return &v;    return &(m_samples);
1195  }  }
1196    
1197    
1198  /*  const DataTypes::RealVectorType*
1199    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1200  {  {
1201      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1202      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1203      // processing single points.          // processing single points.
1204            // we will also know we won't get identity nodes
1205    if (m_readytype!='E')    if (m_readytype!='E')
1206    {    {
1207      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1208    }    }
1209      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1210    size_t subroffset=roffset+m_samplesize;    {
1211    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1212    roffset=offset;    }
1213      size_t loffset=0;
1214      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1215    
1216      roffset=m_samplesize*tid;
1217      unsigned int ndpps=getNumDPPSample();
1218      unsigned int psize=DataTypes::noValues(m_left->getShape());
1219      double* result=&(m_samples[roffset]);
1220      switch (m_op)
1221      {
1222        case MINVAL:
1223            {
1224              for (unsigned int z=0;z<ndpps;++z)
1225              {
1226                FMin op;
1227                *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1228                loffset+=psize;
1229                result++;
1230              }
1231            }
1232            break;
1233        case MAXVAL:
1234            {
1235              for (unsigned int z=0;z<ndpps;++z)
1236              {
1237              FMax op;
1238              *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1239              loffset+=psize;
1240              result++;
1241              }
1242            }
1243            break;
1244        default:
1245            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1246      }
1247      return &(m_samples);
1248    }
1249    
1250    const DataTypes::RealVectorType*
1251    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1252    {
1253            // we assume that any collapsing has been done before we get here
1254            // since we only have one argument we don't need to think about only
1255            // processing single points.
1256      if (m_readytype!='E')
1257      {
1258        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1259      }
1260      if (m_op==IDENTITY)
1261      {
1262        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1263      }
1264      size_t subroffset;
1265      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1266      roffset=m_samplesize*tid;
1267      size_t loop=0;
1268      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1269      size_t step=getNoValues();
1270      size_t offset=roffset;
1271    switch (m_op)    switch (m_op)
1272    {    {
1273      case SYM:      case SYM:
1274      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1275      break;          {
1276                DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1277                subroffset+=step;
1278                offset+=step;
1279            }
1280            break;
1281      case NSYM:      case NSYM:
1282      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1283      break;          {
1284                DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1285                subroffset+=step;
1286                offset+=step;
1287            }
1288            break;
1289        default:
1290            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1291      }
1292      return &m_samples;
1293    }
1294    
1295    const DataTypes::RealVectorType*
1296    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1297    {
1298            // we assume that any collapsing has been done before we get here
1299            // since we only have one argument we don't need to think about only
1300            // processing single points.
1301      if (m_readytype!='E')
1302      {
1303        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1304      }
1305      if (m_op==IDENTITY)
1306      {
1307        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1308      }
1309      size_t subroffset;
1310      size_t offset;
1311      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1312      roffset=m_samplesize*tid;
1313      offset=roffset;
1314      size_t loop=0;
1315      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1316      size_t outstep=getNoValues();
1317      size_t instep=m_left->getNoValues();
1318      switch (m_op)
1319      {
1320        case TRACE:
1321            for (loop=0;loop<numsteps;++loop)
1322            {
1323                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1324                subroffset+=instep;
1325                offset+=outstep;
1326            }
1327            break;
1328        case TRANS:
1329            for (loop=0;loop<numsteps;++loop)
1330            {
1331                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1332                subroffset+=instep;
1333                offset+=outstep;
1334            }
1335            break;
1336      default:      default:
1337      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1338    }    }
1339    return &v;    return &m_samples;
1340  }  }
1341    
1342    
1343    const DataTypes::RealVectorType*
1344    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1345    {
1346      if (m_readytype!='E')
1347      {
1348        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1349      }
1350      if (m_op==IDENTITY)
1351      {
1352        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1353      }
1354      size_t subroffset;
1355      size_t offset;
1356      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1357      roffset=m_samplesize*tid;
1358      offset=roffset;
1359      size_t loop=0;
1360      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1361      size_t outstep=getNoValues();
1362      size_t instep=m_left->getNoValues();
1363      switch (m_op)
1364      {
1365        case SWAP:
1366            for (loop=0;loop<numsteps;++loop)
1367            {
1368                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1369                subroffset+=instep;
1370                offset+=outstep;
1371            }
1372            break;
1373        default:
1374            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1375      }
1376      return &m_samples;
1377    }
1378    
1379    const DataTypes::RealVectorType*
1380    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1381    {
1382      if (m_readytype!='E')
1383      {
1384        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1385      }
1386      if (m_op!=CONDEVAL)
1387      {
1388        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1389      }
1390      size_t subroffset;
1391    
1392  #define PROC_OP(TYPE,X)                               \    const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1393      for (int i=0;i<steps;++i,resultp+=resultStep) \    const ValueType* srcres=0;
1394      { \    if ((*maskres)[subroffset]>0)
1395         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    {
1396         lroffset+=leftStep; \          srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1397         rroffset+=rightStep; \    }
1398      }    else
1399      {
1400            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1401      }
1402    
1403      // Now we need to copy the result
1404    
1405      roffset=m_samplesize*tid;
1406      for (int i=0;i<m_samplesize;++i)
1407      {
1408            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1409      }
1410    
1411      return &m_samples;
1412    }
1413    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1414  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1415  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1416  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 842  DataLazy::resolveNP1OUT(ValueType& v, si Line 1420  DataLazy::resolveNP1OUT(ValueType& v, si
1420  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1421  // For example, 2+Vector.  // For example, 2+Vector.
1422  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1423  DataTypes::ValueType*  const DataTypes::RealVectorType*
1424  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1425  {  {
1426  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1427    
1428    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1429      // first work out which of the children are expanded          // first work out which of the children are expanded
1430    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1431    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1432    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1433    int steps=(bigloops?1:getNumDPPSample());    {
1434    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1435    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1436    {    bool leftScalar=(m_left->getRank()==0);
1437      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1438      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1439      chunksize=1;    // for scalar    {
1440    }              throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1441    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1442    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1443    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1444      // Get the values of sub-expressions    size_t chunksize=1;                   // how many doubles will be processed in one go
1445    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    int leftstep=0;               // how far should the left offset advance after each step
1446    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    int rightstep=0;
1447      // the right child starts further along.    int numsteps=0;               // total number of steps for the inner loop
1448    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    int oleftstep=0;      // the o variables refer to the outer loop
1449      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1450      int onumsteps=1;
1451      
1452      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1453      bool RES=(rightExp && rightScalar);
1454      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1455      bool RS=(!rightExp && rightScalar);
1456      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1457      bool RN=(!rightExp && !rightScalar);
1458      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1459      bool REN=(rightExp && !rightScalar);
1460    
1461      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1462      {
1463            chunksize=m_left->getNumDPPSample()*leftsize;
1464            leftstep=0;
1465            rightstep=0;
1466            numsteps=1;
1467      }
1468      else if (LES || RES)
1469      {
1470            chunksize=1;
1471            if (LES)                // left is an expanded scalar
1472            {
1473                    if (RS)
1474                    {
1475                       leftstep=1;
1476                       rightstep=0;
1477                       numsteps=m_left->getNumDPPSample();
1478                    }
1479                    else            // RN or REN
1480                    {
1481                       leftstep=0;
1482                       oleftstep=1;
1483                       rightstep=1;
1484                       orightstep=(RN ? -(int)rightsize : 0);
1485                       numsteps=rightsize;
1486                       onumsteps=m_left->getNumDPPSample();
1487                    }
1488            }
1489            else            // right is an expanded scalar
1490            {
1491                    if (LS)
1492                    {
1493                       rightstep=1;
1494                       leftstep=0;
1495                       numsteps=m_right->getNumDPPSample();
1496                    }
1497                    else
1498                    {
1499                       rightstep=0;
1500                       orightstep=1;
1501                       leftstep=1;
1502                       oleftstep=(LN ? -(int)leftsize : 0);
1503                       numsteps=leftsize;
1504                       onumsteps=m_right->getNumDPPSample();
1505                    }
1506            }
1507      }
1508      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1509      {
1510            if (LEN)        // and Right will be a single value
1511            {
1512                    chunksize=rightsize;
1513                    leftstep=rightsize;
1514                    rightstep=0;
1515                    numsteps=m_left->getNumDPPSample();
1516                    if (RS)
1517                    {
1518                       numsteps*=leftsize;
1519                    }
1520            }
1521            else    // REN
1522            {
1523                    chunksize=leftsize;
1524                    rightstep=leftsize;
1525                    leftstep=0;
1526                    numsteps=m_right->getNumDPPSample();
1527                    if (LS)
1528                    {
1529                       numsteps*=rightsize;
1530                    }
1531            }
1532      }
1533    
1534      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1535            // Get the values of sub-expressions
1536      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1537      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1538    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1539    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1540    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1541    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1542    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1543    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1544    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1545    
1546    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1547    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1548    
1549    
1550      roffset=m_samplesize*tid;
1551      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1552    switch(m_op)    switch(m_op)
1553    {    {
1554      case ADD:      case ADD:
1555          PROC_OP(NO_ARG,plus<double>());          PROC_OP(NO_ARG,plus<double>());
1556      break;          break;
1557      case SUB:      case SUB:
1558      PROC_OP(NO_ARG,minus<double>());          PROC_OP(NO_ARG,minus<double>());
1559      break;          break;
1560      case MUL:      case MUL:
1561      PROC_OP(NO_ARG,multiplies<double>());          PROC_OP(NO_ARG,multiplies<double>());
1562      break;          break;
1563      case DIV:      case DIV:
1564      PROC_OP(NO_ARG,divides<double>());          PROC_OP(NO_ARG,divides<double>());
1565      break;          break;
1566      case POW:      case POW:
1567         PROC_OP(double (double,double),::pow);         PROC_OP(double (double,double),::pow);
1568      break;          break;
1569      default:      default:
1570      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1571    }    }
1572    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1573    return &v;    return &m_samples;
1574  }  }
1575    
1576    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1577  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1578  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1579  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1580  DataTypes::ValueType*  const DataTypes::RealVectorType*
1581  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1582  {  {
1583  cout << "Resolve TensorProduct: " << toString() << endl;  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1584    
1585    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1586      // first work out which of the children are expanded          // first work out which of the children are expanded
1587    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1588    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1589    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1590    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1591    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1592    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1593      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1594    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1595    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1596    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1597      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1598    
1599      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1600    
1601    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1602    cout << getNoValues() << endl;)
1603    
1604    
1605    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1606    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1607    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1608    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1609    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1610    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1611    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1612    
1613      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1614    switch(m_op)    switch(m_op)
1615    {    {
1616      case PROD:      case PROD:
1617      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1618      {          {
1619            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1620            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1621            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
1622        lroffset+=leftStep;  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1623        rroffset+=rightStep;  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1624      }  
1625      break;            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1626    
1627              lroffset+=leftStep;
1628              rroffset+=rightStep;
1629            }
1630            break;
1631      default:      default:
1632      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1633    }    }
1634    roffset=offset;    roffset=offset;
1635    return &v;    return &m_samples;
1636  }  }
1637    
1638    
1639    const DataTypes::RealVectorType*
1640    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1641    {
1642    #ifdef _OPENMP
1643            int tid=omp_get_thread_num();
1644    #else
1645            int tid=0;
1646    #endif
1647    
1648  /*  #ifdef LAZY_STACK_PROF
1649    \brief Compute the value of the expression for the given sample.          stackstart[tid]=&tid;
1650    \return Vector which stores the value of the subexpression for the given sample.          stackend[tid]=&tid;
1651    \param v A vector to store intermediate results.          const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1652    \param offset Index in v to begin storing results.          size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1653    \param sampleNo Sample number to evaluate.          #pragma omp critical
1654    \param roffset (output parameter) the offset in the return vector where the result begins.          if (d>maxstackuse)
1655            {
1656    cout << "Max resolve Stack use " << d << endl;
1657                    maxstackuse=d;
1658            }
1659            return r;
1660    #else
1661            return resolveNodeSample(tid, sampleNo, roffset);
1662    #endif
1663    }
1664    
1665    The return value will be an existing vector so do not deallocate it.  
1666  */  // This needs to do the work of the identity constructor
1667  // the vector and the offset are a place where the method could write its data if it wishes  void
1668  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::resolveToIdentity()
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
1669  {  {
1670  cout << "Resolve sample " << toString() << endl;     if (m_op==IDENTITY)
1671      // collapse so we have a 'E' node or an IDENTITY for some other type          return;
1672    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1673       makeIdentity(p);
1674    }
1675    
1676    void DataLazy::makeIdentity(const DataReady_ptr& p)
1677    {
1678       m_op=IDENTITY;
1679       m_axis_offset=0;
1680       m_transpose=0;
1681       m_SL=m_SM=m_SR=0;
1682       m_children=m_height=0;
1683       m_id=p;
1684       if(p->isConstant()) {m_readytype='C';}
1685       else if(p->isExpanded()) {m_readytype='E';}
1686       else if (p->isTagged()) {m_readytype='T';}
1687       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1688       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1689       m_left.reset();
1690       m_right.reset();
1691    }
1692    
1693    
1694    DataReady_ptr
1695    DataLazy::resolve()
1696    {
1697        resolveToIdentity();
1698        return m_id;
1699    }
1700    
1701    
1702    /* This is really a static method but I think that caused problems in windows */
1703    void
1704    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1705    {
1706      if (dats.empty())
1707    {    {
1708      collapse();          return;
1709    }    }
1710    if (m_op==IDENTITY)      vector<DataLazy*> work;
1711    {    FunctionSpace fs=dats[0]->getFunctionSpace();
1712      const ValueType& vec=m_id->getVector();    bool match=true;
1713      if (m_readytype=='C')    for (int i=dats.size()-1;i>=0;--i)
1714      {    {
1715      roffset=0;          if (dats[i]->m_readytype!='E')
1716      return &(vec);          {
1717      }                  dats[i]->collapse();
1718      roffset=m_id->getPointOffset(sampleNo, 0);          }
1719      return &(vec);          if (dats[i]->m_op!=IDENTITY)
1720            {
1721                    work.push_back(dats[i]);
1722                    if (fs!=dats[i]->getFunctionSpace())
1723                    {
1724                            match=false;
1725                    }
1726            }
1727      }
1728      if (work.empty())
1729      {
1730            return;         // no work to do
1731      }
1732      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1733      {             // it is possible that dats[0] is one of the objects which we discarded and
1734                    // all the other functionspaces match.
1735            vector<DataExpanded*> dep;
1736            vector<ValueType*> vecs;
1737            for (int i=0;i<work.size();++i)
1738            {
1739                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1740                    vecs.push_back(&(dep[i]->getVectorRW()));
1741            }
1742            int totalsamples=work[0]->getNumSamples();
1743            const ValueType* res=0; // Storage for answer
1744            int sample;
1745            #pragma omp parallel private(sample, res)
1746            {
1747                size_t roffset=0;
1748                #pragma omp for schedule(static)
1749                for (sample=0;sample<totalsamples;++sample)
1750                {
1751                    roffset=0;
1752                    int j;
1753                    for (j=work.size()-1;j>=0;--j)
1754                    {
1755    #ifdef _OPENMP
1756                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1757    #else
1758                        res=work[j]->resolveNodeSample(0,sample,roffset);
1759    #endif
1760                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1761                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1762                    }
1763                }
1764            }
1765            // Now we need to load the new results as identity ops into the lazy nodes
1766            for (int i=work.size()-1;i>=0;--i)
1767            {
1768                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1769            }
1770    }    }
1771    if (m_readytype!='E')    else  // functionspaces do not match
1772    {    {
1773      throw DataException("Programmer Error - Collapse did not produce an expanded node.");          for (int i=0;i<work.size();++i)
1774    }          {
1775    switch (getOpgroup(m_op))                  work[i]->resolveToIdentity();
1776    {          }
   case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1777    }    }
1778  }  }
1779    
1780    
1781  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1782  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1783  DataReady_ptr  DataReady_ptr
1784  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1785  {  {
1786      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
   if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally  
1787    {    {
1788      collapse();      collapse();
1789    }    }
1790    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1791    {    {
1792      return m_id;      return m_id;
1793    }    }
1794      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1795    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1796    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1797    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1798    
1799    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1800    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1801    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;       // Storage for answer
1802    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1803    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
1804    for (sample=0;sample<totalsamples;++sample)    {
1805    {          size_t roffset=0;
1806  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1807            stackstart[omp_get_thread_num()]=&roffset;
1808            stackend[omp_get_thread_num()]=&roffset;
1809    #endif
1810            #pragma omp for schedule(static)
1811            for (sample=0;sample<totalsamples;++sample)
1812            {
1813                    roffset=0;
1814  #ifdef _OPENMP  #ifdef _OPENMP
1815      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1816  #else  #else
1817      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1818  #endif  #endif
1819  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1820      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1821  cerr << "offset=" << outoffset << endl;                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1822      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1823      {          }
1824      resvec[outoffset]=(*res)[resoffset];    }
1825      }  #ifdef LAZY_STACK_PROF
1826  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1827      {
1828            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1829    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1830            if (r>maxstackuse)
1831            {
1832                    maxstackuse=r;
1833            }
1834    }    }
1835      cout << "Max resolve Stack use=" << maxstackuse << endl;
1836    #endif
1837    return resptr;    return resptr;
1838  }  }
1839    
# Line 1057  std::string Line 1841  std::string
1841  DataLazy::toString() const  DataLazy::toString() const
1842  {  {
1843    ostringstream oss;    ostringstream oss;
1844    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1845    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1846      {
1847      case 1:       // tree format
1848            oss << endl;
1849            intoTreeString(oss,"");
1850            break;
1851      case 2:       // just the depth
1852            break;
1853      default:
1854            intoString(oss);
1855            break;
1856      }
1857    return oss.str();    return oss.str();
1858  }  }
1859    
# Line 1066  DataLazy::toString() const Line 1861  DataLazy::toString() const
1861  void  void
1862  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1863  {  {
1864    //    oss << "[" << m_children <<";"<<m_height <<"]";
1865    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1866    {    {
1867    case G_IDENTITY:    case G_IDENTITY:
1868      if (m_id->isExpanded())          if (m_id->isExpanded())
1869      {          {
1870         oss << "E";             oss << "E";
1871      }          }
1872      else if (m_id->isTagged())          else if (m_id->isTagged())
1873      {          {
1874        oss << "T";            oss << "T";
1875      }          }
1876      else if (m_id->isConstant())          else if (m_id->isConstant())
1877      {          {
1878        oss << "C";            oss << "C";
1879      }          }
1880      else          else
1881      {          {
1882        oss << "?";            oss << "?";
1883      }          }
1884      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1885      break;          break;
1886    case G_BINARY:    case G_BINARY:
1887      oss << '(';          oss << '(';
1888      m_left->intoString(oss);          m_left->intoString(oss);
1889      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1890      m_right->intoString(oss);          m_right->intoString(oss);
1891      oss << ')';          oss << ')';
1892      break;          break;
1893    case G_UNARY:    case G_UNARY:
1894      case G_UNARY_P:
1895    case G_NP1OUT:    case G_NP1OUT:
1896      oss << opToString(m_op) << '(';    case G_NP1OUT_P:
1897      m_left->intoString(oss);    case G_REDUCTION:
1898      oss << ')';          oss << opToString(m_op) << '(';
1899      break;          m_left->intoString(oss);
1900            oss << ')';
1901            break;
1902    case G_TENSORPROD:    case G_TENSORPROD:
1903      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1904      m_left->intoString(oss);          m_left->intoString(oss);
1905      oss << ", ";          oss << ", ";
1906      m_right->intoString(oss);          m_right->intoString(oss);
1907      oss << ')';          oss << ')';
1908      break;          break;
1909      case G_NP1OUT_2P:
1910            oss << opToString(m_op) << '(';
1911            m_left->intoString(oss);
1912            oss << ", " << m_axis_offset << ", " << m_transpose;
1913            oss << ')';
1914            break;
1915      case G_CONDEVAL:
1916            oss << opToString(m_op)<< '(' ;
1917            m_mask->intoString(oss);
1918            oss << " ? ";
1919            m_left->intoString(oss);
1920            oss << " : ";
1921            m_right->intoString(oss);
1922            oss << ')';
1923            break;
1924    default:    default:
1925      oss << "UNKNOWN";          oss << "UNKNOWN";
1926    }    }
1927  }  }
1928    
1929    
1930    void
1931    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1932    {
1933      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1934      switch (getOpgroup(m_op))
1935      {
1936      case G_IDENTITY:
1937            if (m_id->isExpanded())
1938            {
1939               oss << "E";
1940            }
1941            else if (m_id->isTagged())
1942            {
1943              oss << "T";
1944            }
1945            else if (m_id->isConstant())
1946            {
1947              oss << "C";
1948            }
1949            else
1950            {
1951              oss << "?";
1952            }
1953            oss << '@' << m_id.get() << endl;
1954            break;
1955      case G_BINARY:
1956            oss << opToString(m_op) << endl;
1957            indent+='.';
1958            m_left->intoTreeString(oss, indent);
1959            m_right->intoTreeString(oss, indent);
1960            break;
1961      case G_UNARY:
1962      case G_UNARY_P:
1963      case G_NP1OUT:
1964      case G_NP1OUT_P:
1965      case G_REDUCTION:
1966            oss << opToString(m_op) << endl;
1967            indent+='.';
1968            m_left->intoTreeString(oss, indent);
1969            break;
1970      case G_TENSORPROD:
1971            oss << opToString(m_op) << endl;
1972            indent+='.';
1973            m_left->intoTreeString(oss, indent);
1974            m_right->intoTreeString(oss, indent);
1975            break;
1976      case G_NP1OUT_2P:
1977            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1978            indent+='.';
1979            m_left->intoTreeString(oss, indent);
1980            break;
1981      default:
1982            oss << "UNKNOWN";
1983      }
1984    }
1985    
1986    
1987  DataAbstract*  DataAbstract*
1988  DataLazy::deepCopy()  DataLazy::deepCopy() const
1989  {  {
1990    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1991    {    {
1992    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1993    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1994    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1995      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1996      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1997    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1998    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1999      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2000      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001    default:    default:
2002      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2003    }    }
2004  }  }
2005    
2006    
2007    
2008  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2009  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2010  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
2011  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2012  // form part of the expression.  // form part of the expression.
2013  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2014  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2015  DataLazy::getLength() const  DataLazy::getLength() const
2016  {  {
2017    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1149  DataLazy::getSlice(const DataTypes::Regi Line 2026  DataLazy::getSlice(const DataTypes::Regi
2026    
2027    
2028  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2029  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2030  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2031                   int dataPointNo)                   int dataPointNo)
2032  {  {
2033    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2034    {    {
2035      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2036    }    }
2037    if (m_readytype!='E')    if (m_readytype!='E')
2038    {    {
2039      collapse();          collapse();
2040      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2041    }    }
2042    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
2043    // so we only need to know which child to ask    // so we only need to know which child to ask
2044    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
2045    {    {
2046      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2047    }    }
2048    else    else
2049    {    {
2050      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2051    }    }
2052  }  }
2053    
2054  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2055  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2056  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2057                   int dataPointNo) const                   int dataPointNo) const
2058  {  {
2059    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2060    {    {
2061      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2062    }    }
2063    if (m_readytype=='E')    if (m_readytype=='E')
2064    {    {
# Line 1189  DataLazy::getPointOffset(int sampleNo, Line 2066  DataLazy::getPointOffset(int sampleNo,
2066      // so we only need to know which child to ask      // so we only need to know which child to ask
2067      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
2068      {      {
2069      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2070      }      }
2071      else      else
2072      {      {
2073      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2074      }      }
2075    }    }
2076    if (m_readytype=='C')    if (m_readytype=='C')
2077    {    {
2078      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2079    }    }
2080    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2081  }  }
2082    
2083  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2084  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2085  void  void
2086  DataLazy::setToZero()  DataLazy::setToZero()
2087  {  {
2088    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2089    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2090    m_op=IDENTITY;  //   m_op=IDENTITY;
2091    m_right.reset();    //   m_right.reset();  
2092    m_left.reset();  //   m_left.reset();
2093    m_readytype='C';  //   m_readytype='C';
2094    m_buffsRequired=1;  //   m_buffsRequired=1;
2095    
2096      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2097      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2098  }  }
2099    
2100  }   // end namespace  bool
2101    DataLazy::actsExpanded() const
2102    {
2103            return (m_readytype=='E');
2104    }
2105    
2106    } // end namespace
2107    

Legend:
Removed from v.2082  
changed lines
  Added in v.6001

  ViewVC Help
Powered by ViewVC 1.1.26