/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2082 by caltinay, Fri Nov 21 01:46:05 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 5985 by jfenwick, Fri Feb 26 02:21:44 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21  #ifdef USE_NETCDF  #include "Data.h"
22  #include <netcdfcpp.h>  #include "DataTypes.h"
23  #endif  #include "EscriptParams.h"
24  #ifdef PASO_MPI  #include "FunctionSpace.h"
25  #include <mpi.h>  #include "UnaryFuncs.h"    // for escript::fsign
26  #endif  #include "Utils.h"
27    
28    #include "esysUtils/Esys_MPI.h"
29    
30  #ifdef _OPENMP  #ifdef _OPENMP
31  #include <omp.h>  #include <omp.h>
32  #endif  #endif
33  #include "FunctionSpace.h"  #ifdef USE_NETCDF
34  #include "DataTypes.h"  #include <netcdfcpp.h>
35  #include "Data.h"  #endif
36  #include "UnaryFuncs.h"     // for escript::fsign  
37  #include "Utils.h"  #include <iomanip>              // for some fancy formatting in debug
38    
39    using namespace escript::DataTypes;
40    
41    #define NO_ARG
42    
43    // #define LAZYDEBUG(X) if (privdebug){X;}
44    #define LAZYDEBUG(X)
45    namespace
46    {
47    bool privdebug=false;
48    
49    #define ENABLEDEBUG privdebug=true;
50    #define DISABLEDEBUG privdebug=false;
51    }
52    
53    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
54    
55    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
56    
57    
58    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
59    
60  /*  /*
61  How does DataLazy work?  How does DataLazy work?
# Line 39  A special operation, IDENTITY, stores an Line 68  A special operation, IDENTITY, stores an
68  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
69    
70  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
71  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
72    
73  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
74  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 47  I will refer to individual DataLazy obje Line 76  I will refer to individual DataLazy obje
76    
77  Each node also stores:  Each node also stores:
78  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
79      evaluated.          evaluated.
80  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
81      evaluate the expression.          evaluate the expression.
82  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
83    
84  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 70  The convention that I use, is that the r Line 99  The convention that I use, is that the r
99  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
100  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
101    
102  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
103  1) Add to the ES_optype.  1) Add to the ES_optype.
104  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
105  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 90  namespace escript Line 119  namespace escript
119  namespace  namespace
120  {  {
121    
122    
123    // enabling this will print out when ever the maximum stacksize used by resolve increases
124    // it assumes _OPENMP is also in use
125    //#define LAZY_STACK_PROF
126    
127    
128    
129    #ifndef _OPENMP
130      #ifdef LAZY_STACK_PROF
131      #undef LAZY_STACK_PROF
132      #endif
133    #endif
134    
135    
136    #ifdef LAZY_STACK_PROF
137    std::vector<void*> stackstart(getNumberOfThreads());
138    std::vector<void*> stackend(getNumberOfThreads());
139    size_t maxstackuse=0;
140    #endif
141    
142  enum ES_opgroup  enum ES_opgroup
143  {  {
144     G_UNKNOWN,     G_UNKNOWN,
145     G_IDENTITY,     G_IDENTITY,
146     G_BINARY,        // pointwise operations with two arguments     G_BINARY,            // pointwise operations with two arguments
147     G_UNARY,     // pointwise operations with one argument     G_UNARY,             // pointwise operations with one argument
148     G_NP1OUT,        // non-pointwise op with one output     G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
149     G_TENSORPROD     // general tensor product     G_NP1OUT,            // non-pointwise op with one output
150       G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
151       G_TENSORPROD,        // general tensor product
152       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
153       G_REDUCTION,         // non-pointwise unary op with a scalar output
154       G_CONDEVAL
155  };  };
156    
157    
158    
159    
160  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
161              "sin","cos","tan",                          "sin","cos","tan",
162              "asin","acos","atan","sinh","cosh","tanh","erf",                          "asin","acos","atan","sinh","cosh","tanh","erf",
163              "asinh","acosh","atanh",                          "asinh","acosh","atanh",
164              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "log10","log","sign","abs","neg","pos","exp","sqrt",
165              "1/","where>0","where<0","where>=0","where<=0",                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
166              "symmetric","nonsymmetric",                          "symmetric","nonsymmetric",
167              "prod"};                          "prod",
168  int ES_opcount=36;                          "transpose", "trace",
169                            "swapaxes",
170                            "minval", "maxval",
171                            "condEval"};
172    int ES_opcount=44;
173  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
174              G_UNARY,G_UNARY,G_UNARY, //10                          G_UNARY,G_UNARY,G_UNARY, //10
175              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
176              G_UNARY,G_UNARY,G_UNARY,                    // 20                          G_UNARY,G_UNARY,G_UNARY,                                        // 20
177              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
178              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
179              G_NP1OUT,G_NP1OUT,                          G_NP1OUT,G_NP1OUT,
180              G_TENSORPROD};                          G_TENSORPROD,
181                            G_NP1OUT_P, G_NP1OUT_P,
182                            G_NP1OUT_2P,
183                            G_REDUCTION, G_REDUCTION,
184                            G_CONDEVAL};
185  inline  inline
186  ES_opgroup  ES_opgroup
187  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 131  getOpgroup(ES_optype op) Line 193  getOpgroup(ES_optype op)
193  FunctionSpace  FunctionSpace
194  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
195  {  {
196      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
197      // maybe we need an interpolate node -          // maybe we need an interpolate node -
198      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
199      // programming error exception.          // programming error exception.
200    
201    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
202    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
203    if (l!=r)    if (l!=r)
204    {    {
205      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
206        if (res==1)
207      {      {
208      return l;          return l;
209      }      }
210      if (l.probeInterpolation(r))      if (res==-1)
211      {      {
212      return r;          return r;
213      }      }
214      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
215    }    }
# Line 158  resultFS(DataAbstract_ptr left, DataAbst Line 221  resultFS(DataAbstract_ptr left, DataAbst
221  DataTypes::ShapeType  DataTypes::ShapeType
222  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
223  {  {
224      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
225      {          {
226        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
227        {            {
228          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
229        }            }
230        if (left->getRank()==0)   // we need to allow scalar * anything  
231        {            if (left->getRank()==0)       // we need to allow scalar * anything
232          return right->getShape();            {
233        }                  return right->getShape();
234        if (right->getRank()==0)            }
235        {            if (right->getRank()==0)
236          return left->getShape();            {
237        }                  return left->getShape();
238        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            }
239      }            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
240      return left->getShape();          }
241            return left->getShape();
242  }  }
243    
244    // return the shape for "op left"
245    
246    DataTypes::ShapeType
247    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
248    {
249            switch(op)
250            {
251            case TRANS:
252               {                    // for the scoping of variables
253                    const DataTypes::ShapeType& s=left->getShape();
254                    DataTypes::ShapeType sh;
255                    int rank=left->getRank();
256                    if (axis_offset<0 || axis_offset>rank)
257                    {
258                stringstream e;
259                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
260                throw DataException(e.str());
261            }
262            for (int i=0; i<rank; i++)
263                    {
264                       int index = (axis_offset+i)%rank;
265               sh.push_back(s[index]); // Append to new shape
266            }
267                    return sh;
268               }
269            break;
270            case TRACE:
271               {
272                    int rank=left->getRank();
273                    if (rank<2)
274                    {
275                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
276                    }
277                    if ((axis_offset>rank-2) || (axis_offset<0))
278                    {
279                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
280                    }
281                    if (rank==2)
282                    {
283                       return DataTypes::scalarShape;
284                    }
285                    else if (rank==3)
286                    {
287                       DataTypes::ShapeType sh;
288                       if (axis_offset==0)
289                       {
290                            sh.push_back(left->getShape()[2]);
291                       }
292                       else         // offset==1
293                       {
294                            sh.push_back(left->getShape()[0]);
295                       }
296                       return sh;
297                    }
298                    else if (rank==4)
299                    {
300                       DataTypes::ShapeType sh;
301                       const DataTypes::ShapeType& s=left->getShape();
302                       if (axis_offset==0)
303                       {
304                            sh.push_back(s[2]);
305                            sh.push_back(s[3]);
306                       }
307                       else if (axis_offset==1)
308                       {
309                            sh.push_back(s[0]);
310                            sh.push_back(s[3]);
311                       }
312                       else         // offset==2
313                       {
314                            sh.push_back(s[0]);
315                            sh.push_back(s[1]);
316                       }
317                       return sh;
318                    }
319                    else            // unknown rank
320                    {
321                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
322                    }
323               }
324            break;
325            default:
326            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
327            }
328    }
329    
330    DataTypes::ShapeType
331    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
332    {
333         // This code taken from the Data.cpp swapaxes() method
334         // Some of the checks are probably redundant here
335         int axis0_tmp,axis1_tmp;
336         const DataTypes::ShapeType& s=left->getShape();
337         DataTypes::ShapeType out_shape;
338         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
339         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
340         int rank=left->getRank();
341         if (rank<2) {
342            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
343         }
344         if (axis0<0 || axis0>rank-1) {
345            stringstream e;
346            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
347            throw DataException(e.str());
348         }
349         if (axis1<0 || axis1>rank-1) {
350            stringstream e;
351            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
352            throw DataException(e.str());
353         }
354         if (axis0 == axis1) {
355             throw DataException("Error - Data::swapaxes: axis indices must be different.");
356         }
357         if (axis0 > axis1) {
358             axis0_tmp=axis1;
359             axis1_tmp=axis0;
360         } else {
361             axis0_tmp=axis0;
362             axis1_tmp=axis1;
363         }
364         for (int i=0; i<rank; i++) {
365           if (i == axis0_tmp) {
366              out_shape.push_back(s[axis1_tmp]);
367           } else if (i == axis1_tmp) {
368              out_shape.push_back(s[axis0_tmp]);
369           } else {
370              out_shape.push_back(s[i]);
371           }
372         }
373        return out_shape;
374    }
375    
376    
377  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
378  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
379  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
380  DataTypes::ShapeType  DataTypes::ShapeType
381  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
382  {  {
383                
384    // Get rank and shape of inputs    // Get rank and shape of inputs
385    int rank0 = left->getRank();    int rank0 = left->getRank();
386    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 192  GTPShape(DataAbstract_ptr left, DataAbst Line 389  GTPShape(DataAbstract_ptr left, DataAbst
389    
390    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
391    int start0=0, start1=0;    int start0=0, start1=0;
392    if (transpose == 0)       {}    if (transpose == 0)           {}
393    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
394    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
395    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
396    
397      if (rank0<axis_offset)
398      {
399            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
400      }
401    
402    // Adjust the shapes for transpose    // Adjust the shapes for transpose
403    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
404    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
405    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
406    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
407    
408    // Prepare for the loops of the product    // Prepare for the loops of the product
409    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
410    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
411      SL *= tmpShape0[i];      SL *= tmpShape0[i];
412    }    }
413    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
414      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
415        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
416      }      }
417      SM *= tmpShape0[i];      SM *= tmpShape0[i];
418    }    }
419    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
420      SR *= tmpShape1[i];      SR *= tmpShape1[i];
421    }    }
422    
423    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
424    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
425    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
426       int out_index=0;       int out_index=0;
427       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
428       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
429    }    }
   return shape2;  
 }  
430    
431      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
432      {
433         ostringstream os;
434         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
435         throw DataException(os.str());
436      }
437    
438  // determine the number of points in the result of "left op right"    return shape2;
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
439  }  }
440    
441    }       // end anonymous namespace
 }   // end anonymous namespace  
442    
443    
444    
# Line 279  opToString(ES_optype op) Line 453  opToString(ES_optype op)
453    return ES_opstrings[op];    return ES_opstrings[op];
454  }  }
455    
456    void DataLazy::LazyNodeSetup()
457    {
458    #ifdef _OPENMP
459        int numthreads=omp_get_max_threads();
460        m_samples.resize(numthreads*m_samplesize);
461        m_sampleids=new int[numthreads];
462        for (int i=0;i<numthreads;++i)
463        {
464            m_sampleids[i]=-1;  
465        }
466    #else
467        m_samples.resize(m_samplesize);
468        m_sampleids=new int[1];
469        m_sampleids[0]=-1;
470    #endif  // _OPENMP
471    }
472    
473    
474    // Creates an identity node
475  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
476      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
477      m_op(IDENTITY),          ,m_sampleids(0),
478      m_axis_offset(0),          m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
479  {  {
480     if (p->isLazy())     if (p->isLazy())
481     {     {
482      // I don't want identity of Lazy.          // I don't want identity of Lazy.
483      // Question: Why would that be so bad?          // Question: Why would that be so bad?
484      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
485      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
486     }     }
487     else     else
488     {     {
489      m_id=dynamic_pointer_cast<DataReady>(p);          p->makeLazyShared();
490      if(p->isConstant()) {m_readytype='C';}          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
491      else if(p->isExpanded()) {m_readytype='E';}          makeIdentity(dr);
492      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
493     }     }
494     m_buffsRequired=1;  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
495  }  }
496    
   
   
   
497  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
498      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
499      m_op(op),          m_op(op),
500      m_axis_offset(0),          m_axis_offset(0),
501      m_transpose(0),          m_transpose(0),
502      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
503  {  {
504     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
505     {     {
506      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
507     }     }
508    
509     DataLazy_ptr lleft;     DataLazy_ptr lleft;
510     if (!left->isLazy())     if (!left->isLazy())
511     {     {
512      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
513     }     }
514     else     else
515     {     {
516      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
517     }     }
518     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
519     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
520     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
521     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
522       m_height=m_left->m_height+1;
523       LazyNodeSetup();
524       SIZELIMIT
525  }  }
526    
527    
528  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
529  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
530      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
531      m_op(op),          m_op(op),
532      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
533  {  {
534    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
535     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
536     {     {
537      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
538     }     }
539    
540     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
541     {     {
542      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
543      Data ltemp(left);          Data ltemp(left);
544      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
545      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
546     }     }
547     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
548     {     {
549      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
550      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
551    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
552     }     }
553     left->operandCheck(*right);     left->operandCheck(*right);
554    
555     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
556     {     {
557      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
558    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
559     }     }
560     else     else
561     {     {
562      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
563    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
564     }     }
565     if (right->isLazy())     if (right->isLazy())
566     {     {
567      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
568    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
569     }     }
570     else     else
571     {     {
572      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
573    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
574     }     }
575     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
576     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
577     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
578     {     {
579      m_readytype='E';          m_readytype='E';
580     }     }
581     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
582     {     {
583      m_readytype='T';          m_readytype='T';
584     }     }
585     else     else
586     {     {
587      m_readytype='C';          m_readytype='C';
588     }     }
589     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
590     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
591     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
592  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
593       SIZELIMIT
594    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
595  }  }
596    
597  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
598      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
599      m_op(op),          m_op(op),
600      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
601      m_transpose(transpose)          m_transpose(transpose)
602  {  {
603     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
604     {     {
605      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
606     }     }
607     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
608     {     {
609      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
610     }     }
611     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
612     {     {
613      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
614      Data ltemp(left);          Data ltemp(left);
615      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
616      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
617     }     }
618     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
619     {     {
620      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
621      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
622     }     }
623     left->operandCheck(*right);  //    left->operandCheck(*right);
624    
625     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
626     {     {
627      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
628     }     }
629     else     else
630     {     {
631      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
632     }     }
633     if (right->isLazy())     if (right->isLazy())
634     {     {
635      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
636     }     }
637     else     else
638     {     {
639      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
640     }     }
641     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
642     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
643     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
644     {     {
645      m_readytype='E';          m_readytype='E';
646     }     }
647     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
648     {     {
649      m_readytype='T';          m_readytype='T';
650     }     }
651     else     else
652     {     {
653      m_readytype='C';          m_readytype='C';
654     }     }
655     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
656     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
657     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
658  cout << "(4)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
659       SIZELIMIT
660    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
661  }  }
662    
663    
664  DataLazy::~DataLazy()  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
665            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
666            m_op(op),
667            m_axis_offset(axis_offset),
668            m_transpose(0),
669            m_tol(0)
670    {
671       if ((getOpgroup(op)!=G_NP1OUT_P))
672       {
673            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
674       }
675       DataLazy_ptr lleft;
676       if (!left->isLazy())
677       {
678            lleft=DataLazy_ptr(new DataLazy(left));
679       }
680       else
681       {
682            lleft=dynamic_pointer_cast<DataLazy>(left);
683       }
684       m_readytype=lleft->m_readytype;
685       m_left=lleft;
686       m_samplesize=getNumDPPSample()*getNoValues();
687       m_children=m_left->m_children+1;
688       m_height=m_left->m_height+1;
689       LazyNodeSetup();
690       SIZELIMIT
691    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
692    }
693    
694    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
695            : parent(left->getFunctionSpace(), left->getShape()),
696            m_op(op),
697            m_axis_offset(0),
698            m_transpose(0),
699            m_tol(tol)
700    {
701       if ((getOpgroup(op)!=G_UNARY_P))
702       {
703            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
704       }
705       DataLazy_ptr lleft;
706       if (!left->isLazy())
707       {
708            lleft=DataLazy_ptr(new DataLazy(left));
709       }
710       else
711       {
712            lleft=dynamic_pointer_cast<DataLazy>(left);
713       }
714       m_readytype=lleft->m_readytype;
715       m_left=lleft;
716       m_samplesize=getNumDPPSample()*getNoValues();
717       m_children=m_left->m_children+1;
718       m_height=m_left->m_height+1;
719       LazyNodeSetup();
720       SIZELIMIT
721    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
722    }
723    
724    
725    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
726            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
727            m_op(op),
728            m_axis_offset(axis0),
729            m_transpose(axis1),
730            m_tol(0)
731  {  {
732       if ((getOpgroup(op)!=G_NP1OUT_2P))
733       {
734            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
735       }
736       DataLazy_ptr lleft;
737       if (!left->isLazy())
738       {
739            lleft=DataLazy_ptr(new DataLazy(left));
740       }
741       else
742       {
743            lleft=dynamic_pointer_cast<DataLazy>(left);
744       }
745       m_readytype=lleft->m_readytype;
746       m_left=lleft;
747       m_samplesize=getNumDPPSample()*getNoValues();
748       m_children=m_left->m_children+1;
749       m_height=m_left->m_height+1;
750       LazyNodeSetup();
751       SIZELIMIT
752    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
753  }  }
754    
755    
756  int  namespace
 DataLazy::getBuffsRequired() const  
757  {  {
758      return m_buffsRequired;  
759        inline int max3(int a, int b, int c)
760        {
761            int t=(a>b?a:b);
762            return (t>c?t:c);
763    
764        }
765  }  }
766    
767    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
768            : parent(left->getFunctionSpace(), left->getShape()),
769            m_op(CONDEVAL),
770            m_axis_offset(0),
771            m_transpose(0),
772            m_tol(0)
773    {
774    
775  size_t     DataLazy_ptr lmask;
776  DataLazy::getMaxSampleSize() const     DataLazy_ptr lleft;
777       DataLazy_ptr lright;
778       if (!mask->isLazy())
779       {
780            lmask=DataLazy_ptr(new DataLazy(mask));
781       }
782       else
783       {
784            lmask=dynamic_pointer_cast<DataLazy>(mask);
785       }
786       if (!left->isLazy())
787       {
788            lleft=DataLazy_ptr(new DataLazy(left));
789       }
790       else
791       {
792            lleft=dynamic_pointer_cast<DataLazy>(left);
793       }
794       if (!right->isLazy())
795       {
796            lright=DataLazy_ptr(new DataLazy(right));
797       }
798       else
799       {
800            lright=dynamic_pointer_cast<DataLazy>(right);
801       }
802       m_readytype=lmask->m_readytype;
803       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
804       {
805            throw DataException("Programmer Error - condEval arguments must have the same readytype");
806       }
807       m_left=lleft;
808       m_right=lright;
809       m_mask=lmask;
810       m_samplesize=getNumDPPSample()*getNoValues();
811       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
812       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
813       LazyNodeSetup();
814       SIZELIMIT
815    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
816    }
817    
818    
819    
820    DataLazy::~DataLazy()
821  {  {
822      return m_maxsamplesize;     delete[] m_sampleids;
823  }  }
824    
825    
826  /*  /*
827    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
828    This does the work for the collapse method.    This does the work for the collapse method.
829    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
830  */  */
831  DataReady_ptr  DataReady_ptr
832  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
833  {  {
834    if (m_readytype=='E')    if (m_readytype=='E')
835    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
836      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
837    }    }
838    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 511  DataLazy::collapseToReady() Line 850  DataLazy::collapseToReady()
850    switch(m_op)    switch(m_op)
851    {    {
852      case ADD:      case ADD:
853      result=left+right;          result=left+right;
854      break;          break;
855      case SUB:            case SUB:          
856      result=left-right;          result=left-right;
857      break;          break;
858      case MUL:            case MUL:          
859      result=left*right;          result=left*right;
860      break;          break;
861      case DIV:            case DIV:          
862      result=left/right;          result=left/right;
863      break;          break;
864      case SIN:      case SIN:
865      result=left.sin();            result=left.sin();      
866      break;          break;
867      case COS:      case COS:
868      result=left.cos();          result=left.cos();
869      break;          break;
870      case TAN:      case TAN:
871      result=left.tan();          result=left.tan();
872      break;          break;
873      case ASIN:      case ASIN:
874      result=left.asin();          result=left.asin();
875      break;          break;
876      case ACOS:      case ACOS:
877      result=left.acos();          result=left.acos();
878      break;          break;
879      case ATAN:      case ATAN:
880      result=left.atan();          result=left.atan();
881      break;          break;
882      case SINH:      case SINH:
883      result=left.sinh();          result=left.sinh();
884      break;          break;
885      case COSH:      case COSH:
886      result=left.cosh();          result=left.cosh();
887      break;          break;
888      case TANH:      case TANH:
889      result=left.tanh();          result=left.tanh();
890      break;          break;
891      case ERF:      case ERF:
892      result=left.erf();          result=left.erf();
893      break;          break;
894     case ASINH:     case ASINH:
895      result=left.asinh();          result=left.asinh();
896      break;          break;
897     case ACOSH:     case ACOSH:
898      result=left.acosh();          result=left.acosh();
899      break;          break;
900     case ATANH:     case ATANH:
901      result=left.atanh();          result=left.atanh();
902      break;          break;
903      case LOG10:      case LOG10:
904      result=left.log10();          result=left.log10();
905      break;          break;
906      case LOG:      case LOG:
907      result=left.log();          result=left.log();
908      break;          break;
909      case SIGN:      case SIGN:
910      result=left.sign();          result=left.sign();
911      break;          break;
912      case ABS:      case ABS:
913      result=left.abs();          result=left.abs();
914      break;          break;
915      case NEG:      case NEG:
916      result=left.neg();          result=left.neg();
917      break;          break;
918      case POS:      case POS:
919      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
920      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
921      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
922      break;          break;
923      case EXP:      case EXP:
924      result=left.exp();          result=left.exp();
925      break;          break;
926      case SQRT:      case SQRT:
927      result=left.sqrt();          result=left.sqrt();
928      break;          break;
929      case RECIP:      case RECIP:
930      result=left.oneOver();          result=left.oneOver();
931      break;          break;
932      case GZ:      case GZ:
933      result=left.wherePositive();          result=left.wherePositive();
934      break;          break;
935      case LZ:      case LZ:
936      result=left.whereNegative();          result=left.whereNegative();
937      break;          break;
938      case GEZ:      case GEZ:
939      result=left.whereNonNegative();          result=left.whereNonNegative();
940      break;          break;
941      case LEZ:      case LEZ:
942      result=left.whereNonPositive();          result=left.whereNonPositive();
943      break;          break;
944        case NEZ:
945            result=left.whereNonZero(m_tol);
946            break;
947        case EZ:
948            result=left.whereZero(m_tol);
949            break;
950      case SYM:      case SYM:
951      result=left.symmetric();          result=left.symmetric();
952      break;          break;
953      case NSYM:      case NSYM:
954      result=left.nonsymmetric();          result=left.nonsymmetric();
955      break;          break;
956      case PROD:      case PROD:
957      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
958      break;          break;
959        case TRANS:
960            result=left.transpose(m_axis_offset);
961            break;
962        case TRACE:
963            result=left.trace(m_axis_offset);
964            break;
965        case SWAP:
966            result=left.swapaxes(m_axis_offset, m_transpose);
967            break;
968        case MINVAL:
969            result=left.minval();
970            break;
971        case MAXVAL:
972            result=left.minval();
973            break;
974      default:      default:
975      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
976    }    }
977    return result.borrowReadyPtr();    return result.borrowReadyPtr();
978  }  }
# Line 624  DataLazy::collapseToReady() Line 984  DataLazy::collapseToReady()
984     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
985  */  */
986  void  void
987  DataLazy::collapse()  DataLazy::collapse() const
988  {  {
989    if (m_op==IDENTITY)    if (m_op==IDENTITY)
990    {    {
991      return;          return;
992    }    }
993    if (m_readytype=='E')    if (m_readytype=='E')
994    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
995      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
996    }    }
997    m_id=collapseToReady();    m_id=collapseToReady();
998    m_op=IDENTITY;    m_op=IDENTITY;
999  }  }
1000    
1001  /*  
1002    \brief Compute the value of the expression (unary operation) for the given sample.  
1003    \return Vector which stores the value of the subexpression for the given sample.  
1004    \param v A vector to store intermediate results.  
1005    \param offset Index in v to begin storing results.  
1006    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1007    \param roffset (output parameter) the offset in the return vector where the result begins.          for (int j=0;j<onumsteps;++j)\
1008            {\
1009    The return value will be an existing vector so do not deallocate it.            for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1010    If the result is stored in v it should be stored at the offset given.            { \
1011    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1012  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1013  DataTypes::ValueType*               tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1014  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1015                 lroffset+=leftstep; \
1016                 rroffset+=rightstep; \
1017              }\
1018              lroffset+=oleftstep;\
1019              rroffset+=orightstep;\
1020            }
1021    
1022    
1023    // The result will be stored in m_samples
1024    // The return value is a pointer to the DataVector, offset is the offset within the return value
1025    const DataTypes::RealVectorType*
1026    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1027  {  {
1028      // we assume that any collapsing has been done before we get here  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1029      // since we only have one argument we don't need to think about only          // collapse so we have a 'E' node or an IDENTITY for some other type
1030      // processing single points.    if (m_readytype!='E' && m_op!=IDENTITY)
1031      {
1032            collapse();
1033      }
1034      if (m_op==IDENTITY)  
1035      {
1036        const ValueType& vec=m_id->getVectorRO();
1037        roffset=m_id->getPointOffset(sampleNo, 0);
1038    #ifdef LAZY_STACK_PROF
1039    int x;
1040    if (&x<stackend[omp_get_thread_num()])
1041    {
1042           stackend[omp_get_thread_num()]=&x;
1043    }
1044    #endif
1045        return &(vec);
1046      }
1047      if (m_readytype!='E')
1048      {
1049        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1050      }
1051      if (m_sampleids[tid]==sampleNo)
1052      {
1053            roffset=tid*m_samplesize;
1054            return &(m_samples);            // sample is already resolved
1055      }
1056      m_sampleids[tid]=sampleNo;
1057    
1058      switch (getOpgroup(m_op))
1059      {
1060      case G_UNARY:
1061      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1062      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1063      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1064      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1065      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1066      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1067      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1068      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1069      default:
1070        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1071      }
1072    }
1073    
1074    const DataTypes::RealVectorType*
1075    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1076    {
1077            // we assume that any collapsing has been done before we get here
1078            // since we only have one argument we don't need to think about only
1079            // processing single points.
1080            // we will also know we won't get identity nodes
1081    if (m_readytype!='E')    if (m_readytype!='E')
1082    {    {
1083      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1084    }    }
1085    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1086    const double* left=&((*vleft)[roffset]);    {
1087    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1088    roffset=offset;    }
1089      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1090      const double* left=&((*leftres)[roffset]);
1091      roffset=m_samplesize*tid;
1092      double* result=&(m_samples[roffset]);
1093    switch (m_op)    switch (m_op)
1094    {    {
1095      case SIN:        case SIN:  
1096      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1097      break;          break;
1098      case COS:      case COS:
1099      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1100      break;          break;
1101      case TAN:      case TAN:
1102      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1103      break;          break;
1104      case ASIN:      case ASIN:
1105      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1106      break;          break;
1107      case ACOS:      case ACOS:
1108      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1109      break;          break;
1110      case ATAN:      case ATAN:
1111      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1112      break;          break;
1113      case SINH:      case SINH:
1114      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1115      break;          break;
1116      case COSH:      case COSH:
1117      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1118      break;          break;
1119      case TANH:      case TANH:
1120      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1121      break;          break;
1122      case ERF:      case ERF:
1123  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1124      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");          throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1125  #else  #else
1126      tensor_unary_operation(m_samplesize, left, result, ::erf);          tensor_unary_operation(m_samplesize, left, result, ::erf);
1127      break;          break;
1128  #endif  #endif
1129     case ASINH:     case ASINH:
1130  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1131      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1132  #else  #else
1133      tensor_unary_operation(m_samplesize, left, result, ::asinh);          tensor_unary_operation(m_samplesize, left, result, ::asinh);
1134  #endif    #endif  
1135      break;          break;
1136     case ACOSH:     case ACOSH:
1137  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1138      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1139  #else  #else
1140      tensor_unary_operation(m_samplesize, left, result, ::acosh);          tensor_unary_operation(m_samplesize, left, result, ::acosh);
1141  #endif    #endif  
1142      break;          break;
1143     case ATANH:     case ATANH:
1144  #if defined (_WIN32) && !defined(__INTEL_COMPILER)  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1145      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1146  #else  #else
1147      tensor_unary_operation(m_samplesize, left, result, ::atanh);          tensor_unary_operation(m_samplesize, left, result, ::atanh);
1148  #endif    #endif  
1149      break;          break;
1150      case LOG10:      case LOG10:
1151      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1152      break;          break;
1153      case LOG:      case LOG:
1154      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1155      break;          break;
1156      case SIGN:      case SIGN:
1157      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1158      break;          break;
1159      case ABS:      case ABS:
1160      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1161      break;          break;
1162      case NEG:      case NEG:
1163      tensor_unary_operation(m_samplesize, left, result, negate<double>());          tensor_unary_operation(m_samplesize, left, result, negate<double>());
1164      break;          break;
1165      case POS:      case POS:
1166      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1167      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1168      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1169      break;          break;
1170      case EXP:      case EXP:
1171      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1172      break;          break;
1173      case SQRT:      case SQRT:
1174      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1175      break;          break;
1176      case RECIP:      case RECIP:
1177      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1178      break;          break;
1179      case GZ:      case GZ:
1180      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1181      break;          break;
1182      case LZ:      case LZ:
1183      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1184      break;          break;
1185      case GEZ:      case GEZ:
1186      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1187      break;          break;
1188      case LEZ:      case LEZ:
1189      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1190      break;          break;
1191    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1192        case NEZ:
1193            tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1194            break;
1195        case EZ:
1196            tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1197            break;
1198    
1199      default:      default:
1200      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1201    }    }
1202    return &v;    return &(m_samples);
1203  }  }
1204    
1205    
1206  /*  const DataTypes::RealVectorType*
1207    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1208  {  {
1209      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1210      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1211      // processing single points.          // processing single points.
1212            // we will also know we won't get identity nodes
1213    if (m_readytype!='E')    if (m_readytype!='E')
1214    {    {
1215      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1216    }    }
1217      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1218    size_t subroffset=roffset+m_samplesize;    {
1219    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1220    roffset=offset;    }
1221      size_t loffset=0;
1222      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1223    
1224      roffset=m_samplesize*tid;
1225      unsigned int ndpps=getNumDPPSample();
1226      unsigned int psize=DataTypes::noValues(m_left->getShape());
1227      double* result=&(m_samples[roffset]);
1228      switch (m_op)
1229      {
1230        case MINVAL:
1231            {
1232              for (unsigned int z=0;z<ndpps;++z)
1233              {
1234                FMin op;
1235                *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1236                loffset+=psize;
1237                result++;
1238              }
1239            }
1240            break;
1241        case MAXVAL:
1242            {
1243              for (unsigned int z=0;z<ndpps;++z)
1244              {
1245              FMax op;
1246              *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1247              loffset+=psize;
1248              result++;
1249              }
1250            }
1251            break;
1252        default:
1253            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1254      }
1255      return &(m_samples);
1256    }
1257    
1258    const DataTypes::RealVectorType*
1259    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1260    {
1261            // we assume that any collapsing has been done before we get here
1262            // since we only have one argument we don't need to think about only
1263            // processing single points.
1264      if (m_readytype!='E')
1265      {
1266        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1267      }
1268      if (m_op==IDENTITY)
1269      {
1270        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1271      }
1272      size_t subroffset;
1273      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1274      roffset=m_samplesize*tid;
1275      size_t loop=0;
1276      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1277      size_t step=getNoValues();
1278      size_t offset=roffset;
1279    switch (m_op)    switch (m_op)
1280    {    {
1281      case SYM:      case SYM:
1282      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1283      break;          {
1284                DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1285                subroffset+=step;
1286                offset+=step;
1287            }
1288            break;
1289      case NSYM:      case NSYM:
1290      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1291      break;          {
1292                DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1293                subroffset+=step;
1294                offset+=step;
1295            }
1296            break;
1297      default:      default:
1298      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1299    }    }
1300    return &v;    return &m_samples;
1301  }  }
1302    
1303    const DataTypes::RealVectorType*
1304    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1305    {
1306            // we assume that any collapsing has been done before we get here
1307            // since we only have one argument we don't need to think about only
1308            // processing single points.
1309      if (m_readytype!='E')
1310      {
1311        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1312      }
1313      if (m_op==IDENTITY)
1314      {
1315        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1316      }
1317      size_t subroffset;
1318      size_t offset;
1319      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1320      roffset=m_samplesize*tid;
1321      offset=roffset;
1322      size_t loop=0;
1323      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1324      size_t outstep=getNoValues();
1325      size_t instep=m_left->getNoValues();
1326      switch (m_op)
1327      {
1328        case TRACE:
1329            for (loop=0;loop<numsteps;++loop)
1330            {
1331                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1332                subroffset+=instep;
1333                offset+=outstep;
1334            }
1335            break;
1336        case TRANS:
1337            for (loop=0;loop<numsteps;++loop)
1338            {
1339                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1340                subroffset+=instep;
1341                offset+=outstep;
1342            }
1343            break;
1344        default:
1345            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1346      }
1347      return &m_samples;
1348    }
1349    
1350    
1351    const DataTypes::RealVectorType*
1352    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1353    {
1354      if (m_readytype!='E')
1355      {
1356        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1357      }
1358      if (m_op==IDENTITY)
1359      {
1360        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1361      }
1362      size_t subroffset;
1363      size_t offset;
1364      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1365      roffset=m_samplesize*tid;
1366      offset=roffset;
1367      size_t loop=0;
1368      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1369      size_t outstep=getNoValues();
1370      size_t instep=m_left->getNoValues();
1371      switch (m_op)
1372      {
1373        case SWAP:
1374            for (loop=0;loop<numsteps;++loop)
1375            {
1376                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1377                subroffset+=instep;
1378                offset+=outstep;
1379            }
1380            break;
1381        default:
1382            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1383      }
1384      return &m_samples;
1385    }
1386    
1387  #define PROC_OP(TYPE,X)                               \  const DataTypes::RealVectorType*
1388      for (int i=0;i<steps;++i,resultp+=resultStep) \  DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1389      { \  {
1390         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    if (m_readytype!='E')
1391         lroffset+=leftStep; \    {
1392         rroffset+=rightStep; \      throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1393      }    }
1394      if (m_op!=CONDEVAL)
1395      {
1396        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1397      }
1398      size_t subroffset;
1399    
1400      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1401      const ValueType* srcres=0;
1402      if ((*maskres)[subroffset]>0)
1403      {
1404            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1405      }
1406      else
1407      {
1408            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1409      }
1410    
1411      // Now we need to copy the result
1412    
1413      roffset=m_samplesize*tid;
1414      for (int i=0;i<m_samplesize;++i)
1415      {
1416            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1417      }
1418    
1419      return &m_samples;
1420    }
1421    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1422  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1423  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1424  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 842  DataLazy::resolveNP1OUT(ValueType& v, si Line 1428  DataLazy::resolveNP1OUT(ValueType& v, si
1428  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1429  // For example, 2+Vector.  // For example, 2+Vector.
1430  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1431  DataTypes::ValueType*  const DataTypes::RealVectorType*
1432  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1433  {  {
1434  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1435    
1436    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1437      // first work out which of the children are expanded          // first work out which of the children are expanded
1438    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1439    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1440    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1441    int steps=(bigloops?1:getNumDPPSample());    {
1442    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1443    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1444    {    bool leftScalar=(m_left->getRank()==0);
1445      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1446      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1447      chunksize=1;    // for scalar    {
1448    }              throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1449    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1450    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1451    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1452      // Get the values of sub-expressions    size_t chunksize=1;                   // how many doubles will be processed in one go
1453    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    int leftstep=0;               // how far should the left offset advance after each step
1454    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    int rightstep=0;
1455      // the right child starts further along.    int numsteps=0;               // total number of steps for the inner loop
1456    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    int oleftstep=0;      // the o variables refer to the outer loop
1457      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1458      int onumsteps=1;
1459      
1460      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1461      bool RES=(rightExp && rightScalar);
1462      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1463      bool RS=(!rightExp && rightScalar);
1464      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1465      bool RN=(!rightExp && !rightScalar);
1466      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1467      bool REN=(rightExp && !rightScalar);
1468    
1469      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1470      {
1471            chunksize=m_left->getNumDPPSample()*leftsize;
1472            leftstep=0;
1473            rightstep=0;
1474            numsteps=1;
1475      }
1476      else if (LES || RES)
1477      {
1478            chunksize=1;
1479            if (LES)                // left is an expanded scalar
1480            {
1481                    if (RS)
1482                    {
1483                       leftstep=1;
1484                       rightstep=0;
1485                       numsteps=m_left->getNumDPPSample();
1486                    }
1487                    else            // RN or REN
1488                    {
1489                       leftstep=0;
1490                       oleftstep=1;
1491                       rightstep=1;
1492                       orightstep=(RN ? -(int)rightsize : 0);
1493                       numsteps=rightsize;
1494                       onumsteps=m_left->getNumDPPSample();
1495                    }
1496            }
1497            else            // right is an expanded scalar
1498            {
1499                    if (LS)
1500                    {
1501                       rightstep=1;
1502                       leftstep=0;
1503                       numsteps=m_right->getNumDPPSample();
1504                    }
1505                    else
1506                    {
1507                       rightstep=0;
1508                       orightstep=1;
1509                       leftstep=1;
1510                       oleftstep=(LN ? -(int)leftsize : 0);
1511                       numsteps=leftsize;
1512                       onumsteps=m_right->getNumDPPSample();
1513                    }
1514            }
1515      }
1516      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1517      {
1518            if (LEN)        // and Right will be a single value
1519            {
1520                    chunksize=rightsize;
1521                    leftstep=rightsize;
1522                    rightstep=0;
1523                    numsteps=m_left->getNumDPPSample();
1524                    if (RS)
1525                    {
1526                       numsteps*=leftsize;
1527                    }
1528            }
1529            else    // REN
1530            {
1531                    chunksize=leftsize;
1532                    rightstep=leftsize;
1533                    leftstep=0;
1534                    numsteps=m_right->getNumDPPSample();
1535                    if (LS)
1536                    {
1537                       numsteps*=rightsize;
1538                    }
1539            }
1540      }
1541    
1542      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1543            // Get the values of sub-expressions
1544      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1545      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1546    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1547    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1548    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1549    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1550    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1551    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1552    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1553    
1554    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1555    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1556    
1557    
1558      roffset=m_samplesize*tid;
1559      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1560    switch(m_op)    switch(m_op)
1561    {    {
1562      case ADD:      case ADD:
1563          PROC_OP(NO_ARG,plus<double>());          PROC_OP(NO_ARG,plus<double>());
1564      break;          break;
1565      case SUB:      case SUB:
1566      PROC_OP(NO_ARG,minus<double>());          PROC_OP(NO_ARG,minus<double>());
1567      break;          break;
1568      case MUL:      case MUL:
1569      PROC_OP(NO_ARG,multiplies<double>());          PROC_OP(NO_ARG,multiplies<double>());
1570      break;          break;
1571      case DIV:      case DIV:
1572      PROC_OP(NO_ARG,divides<double>());          PROC_OP(NO_ARG,divides<double>());
1573      break;          break;
1574      case POW:      case POW:
1575         PROC_OP(double (double,double),::pow);         PROC_OP(double (double,double),::pow);
1576      break;          break;
1577      default:      default:
1578      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1579    }    }
1580    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1581    return &v;    return &m_samples;
1582  }  }
1583    
1584    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1585  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1586  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1587  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1588  DataTypes::ValueType*  const DataTypes::RealVectorType*
1589  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1590  {  {
1591  cout << "Resolve TensorProduct: " << toString() << endl;  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1592    
1593    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1594      // first work out which of the children are expanded          // first work out which of the children are expanded
1595    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1596    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1597    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1598    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1599    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1600    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1601      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1602    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1603    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1604    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1605      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1606    
1607      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1608    
1609    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1610    cout << getNoValues() << endl;)
1611    
1612    
1613    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1614    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1615    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1616    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1617    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1618    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1619    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1620    
1621      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1622    switch(m_op)    switch(m_op)
1623    {    {
1624      case PROD:      case PROD:
1625      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1626      {          {
1627            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1628            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1629            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
1630        lroffset+=leftStep;  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1631        rroffset+=rightStep;  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1632      }  
1633      break;            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1634    
1635              lroffset+=leftStep;
1636              rroffset+=rightStep;
1637            }
1638            break;
1639      default:      default:
1640      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1641    }    }
1642    roffset=offset;    roffset=offset;
1643    return &v;    return &m_samples;
1644  }  }
1645    
1646    
1647    const DataTypes::RealVectorType*
1648    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1649    {
1650    #ifdef _OPENMP
1651            int tid=omp_get_thread_num();
1652    #else
1653            int tid=0;
1654    #endif
1655    
1656  /*  #ifdef LAZY_STACK_PROF
1657    \brief Compute the value of the expression for the given sample.          stackstart[tid]=&tid;
1658    \return Vector which stores the value of the subexpression for the given sample.          stackend[tid]=&tid;
1659    \param v A vector to store intermediate results.          const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1660    \param offset Index in v to begin storing results.          size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1661    \param sampleNo Sample number to evaluate.          #pragma omp critical
1662    \param roffset (output parameter) the offset in the return vector where the result begins.          if (d>maxstackuse)
1663            {
1664    cout << "Max resolve Stack use " << d << endl;
1665                    maxstackuse=d;
1666            }
1667            return r;
1668    #else
1669            return resolveNodeSample(tid, sampleNo, roffset);
1670    #endif
1671    }
1672    
1673    The return value will be an existing vector so do not deallocate it.  
1674  */  // This needs to do the work of the identity constructor
1675  // the vector and the offset are a place where the method could write its data if it wishes  void
1676  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::resolveToIdentity()
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
1677  {  {
1678  cout << "Resolve sample " << toString() << endl;     if (m_op==IDENTITY)
1679      // collapse so we have a 'E' node or an IDENTITY for some other type          return;
1680    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1681    {     makeIdentity(p);
1682      collapse();  }
1683    }  
1684    if (m_op==IDENTITY)    void DataLazy::makeIdentity(const DataReady_ptr& p)
1685    {
1686       m_op=IDENTITY;
1687       m_axis_offset=0;
1688       m_transpose=0;
1689       m_SL=m_SM=m_SR=0;
1690       m_children=m_height=0;
1691       m_id=p;
1692       if(p->isConstant()) {m_readytype='C';}
1693       else if(p->isExpanded()) {m_readytype='E';}
1694       else if (p->isTagged()) {m_readytype='T';}
1695       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1696       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1697       m_left.reset();
1698       m_right.reset();
1699    }
1700    
1701    
1702    DataReady_ptr
1703    DataLazy::resolve()
1704    {
1705        resolveToIdentity();
1706        return m_id;
1707    }
1708    
1709    
1710    /* This is really a static method but I think that caused problems in windows */
1711    void
1712    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1713    {
1714      if (dats.empty())
1715    {    {
1716      const ValueType& vec=m_id->getVector();          return;
     if (m_readytype=='C')  
     {  
     roffset=0;  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
     return &(vec);  
1717    }    }
1718    if (m_readytype!='E')    vector<DataLazy*> work;
1719    {    FunctionSpace fs=dats[0]->getFunctionSpace();
1720      throw DataException("Programmer Error - Collapse did not produce an expanded node.");    bool match=true;
1721      for (int i=dats.size()-1;i>=0;--i)
1722      {
1723            if (dats[i]->m_readytype!='E')
1724            {
1725                    dats[i]->collapse();
1726            }
1727            if (dats[i]->m_op!=IDENTITY)
1728            {
1729                    work.push_back(dats[i]);
1730                    if (fs!=dats[i]->getFunctionSpace())
1731                    {
1732                            match=false;
1733                    }
1734            }
1735      }
1736      if (work.empty())
1737      {
1738            return;         // no work to do
1739      }
1740      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1741      {             // it is possible that dats[0] is one of the objects which we discarded and
1742                    // all the other functionspaces match.
1743            vector<DataExpanded*> dep;
1744            vector<ValueType*> vecs;
1745            for (int i=0;i<work.size();++i)
1746            {
1747                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1748                    vecs.push_back(&(dep[i]->getVectorRW()));
1749            }
1750            int totalsamples=work[0]->getNumSamples();
1751            const ValueType* res=0; // Storage for answer
1752            int sample;
1753            #pragma omp parallel private(sample, res)
1754            {
1755                size_t roffset=0;
1756                #pragma omp for schedule(static)
1757                for (sample=0;sample<totalsamples;++sample)
1758                {
1759                    roffset=0;
1760                    int j;
1761                    for (j=work.size()-1;j>=0;--j)
1762                    {
1763    #ifdef _OPENMP
1764                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1765    #else
1766                        res=work[j]->resolveNodeSample(0,sample,roffset);
1767    #endif
1768                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1769                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1770                    }
1771                }
1772            }
1773            // Now we need to load the new results as identity ops into the lazy nodes
1774            for (int i=work.size()-1;i>=0;--i)
1775            {
1776                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1777            }
1778    }    }
1779    switch (getOpgroup(m_op))    else  // functionspaces do not match
1780    {    {
1781    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);          for (int i=0;i<work.size();++i)
1782    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);          {
1783    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);                  work[i]->resolveToIdentity();
1784    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);          }
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1785    }    }
1786  }  }
1787    
1788    
1789  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1790  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1791  DataReady_ptr  DataReady_ptr
1792  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1793  {  {
1794      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
   if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally  
1795    {    {
1796      collapse();      collapse();
1797    }    }
1798    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1799    {    {
1800      return m_id;      return m_id;
1801    }    }
1802      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1803    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1804    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1805    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1806    
1807    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1808    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1809    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;       // Storage for answer
1810    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1811    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
1812    for (sample=0;sample<totalsamples;++sample)    {
1813    {          size_t roffset=0;
1814  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1815            stackstart[omp_get_thread_num()]=&roffset;
1816            stackend[omp_get_thread_num()]=&roffset;
1817    #endif
1818            #pragma omp for schedule(static)
1819            for (sample=0;sample<totalsamples;++sample)
1820            {
1821                    roffset=0;
1822  #ifdef _OPENMP  #ifdef _OPENMP
1823      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1824  #else  #else
1825      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1826  #endif  #endif
1827  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1828      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1829  cerr << "offset=" << outoffset << endl;                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1830      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1831      {          }
1832      resvec[outoffset]=(*res)[resoffset];    }
1833      }  #ifdef LAZY_STACK_PROF
1834  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1835      {
1836            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1837    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1838            if (r>maxstackuse)
1839            {
1840                    maxstackuse=r;
1841            }
1842    }    }
1843      cout << "Max resolve Stack use=" << maxstackuse << endl;
1844    #endif
1845    return resptr;    return resptr;
1846  }  }
1847    
# Line 1057  std::string Line 1849  std::string
1849  DataLazy::toString() const  DataLazy::toString() const
1850  {  {
1851    ostringstream oss;    ostringstream oss;
1852    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1853    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1854      {
1855      case 1:       // tree format
1856            oss << endl;
1857            intoTreeString(oss,"");
1858            break;
1859      case 2:       // just the depth
1860            break;
1861      default:
1862            intoString(oss);
1863            break;
1864      }
1865    return oss.str();    return oss.str();
1866  }  }
1867    
# Line 1066  DataLazy::toString() const Line 1869  DataLazy::toString() const
1869  void  void
1870  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1871  {  {
1872    //    oss << "[" << m_children <<";"<<m_height <<"]";
1873      switch (getOpgroup(m_op))
1874      {
1875      case G_IDENTITY:
1876            if (m_id->isExpanded())
1877            {
1878               oss << "E";
1879            }
1880            else if (m_id->isTagged())
1881            {
1882              oss << "T";
1883            }
1884            else if (m_id->isConstant())
1885            {
1886              oss << "C";
1887            }
1888            else
1889            {
1890              oss << "?";
1891            }
1892            oss << '@' << m_id.get();
1893            break;
1894      case G_BINARY:
1895            oss << '(';
1896            m_left->intoString(oss);
1897            oss << ' ' << opToString(m_op) << ' ';
1898            m_right->intoString(oss);
1899            oss << ')';
1900            break;
1901      case G_UNARY:
1902      case G_UNARY_P:
1903      case G_NP1OUT:
1904      case G_NP1OUT_P:
1905      case G_REDUCTION:
1906            oss << opToString(m_op) << '(';
1907            m_left->intoString(oss);
1908            oss << ')';
1909            break;
1910      case G_TENSORPROD:
1911            oss << opToString(m_op) << '(';
1912            m_left->intoString(oss);
1913            oss << ", ";
1914            m_right->intoString(oss);
1915            oss << ')';
1916            break;
1917      case G_NP1OUT_2P:
1918            oss << opToString(m_op) << '(';
1919            m_left->intoString(oss);
1920            oss << ", " << m_axis_offset << ", " << m_transpose;
1921            oss << ')';
1922            break;
1923      case G_CONDEVAL:
1924            oss << opToString(m_op)<< '(' ;
1925            m_mask->intoString(oss);
1926            oss << " ? ";
1927            m_left->intoString(oss);
1928            oss << " : ";
1929            m_right->intoString(oss);
1930            oss << ')';
1931            break;
1932      default:
1933            oss << "UNKNOWN";
1934      }
1935    }
1936    
1937    
1938    void
1939    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1940    {
1941      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1942    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1943    {    {
1944    case G_IDENTITY:    case G_IDENTITY:
1945      if (m_id->isExpanded())          if (m_id->isExpanded())
1946      {          {
1947         oss << "E";             oss << "E";
1948      }          }
1949      else if (m_id->isTagged())          else if (m_id->isTagged())
1950      {          {
1951        oss << "T";            oss << "T";
1952      }          }
1953      else if (m_id->isConstant())          else if (m_id->isConstant())
1954      {          {
1955        oss << "C";            oss << "C";
1956      }          }
1957      else          else
1958      {          {
1959        oss << "?";            oss << "?";
1960      }          }
1961      oss << '@' << m_id.get();          oss << '@' << m_id.get() << endl;
1962      break;          break;
1963    case G_BINARY:    case G_BINARY:
1964      oss << '(';          oss << opToString(m_op) << endl;
1965      m_left->intoString(oss);          indent+='.';
1966      oss << ' ' << opToString(m_op) << ' ';          m_left->intoTreeString(oss, indent);
1967      m_right->intoString(oss);          m_right->intoTreeString(oss, indent);
1968      oss << ')';          break;
     break;  
1969    case G_UNARY:    case G_UNARY:
1970      case G_UNARY_P:
1971    case G_NP1OUT:    case G_NP1OUT:
1972      oss << opToString(m_op) << '(';    case G_NP1OUT_P:
1973      m_left->intoString(oss);    case G_REDUCTION:
1974      oss << ')';          oss << opToString(m_op) << endl;
1975      break;          indent+='.';
1976            m_left->intoTreeString(oss, indent);
1977            break;
1978    case G_TENSORPROD:    case G_TENSORPROD:
1979      oss << opToString(m_op) << '(';          oss << opToString(m_op) << endl;
1980      m_left->intoString(oss);          indent+='.';
1981      oss << ", ";          m_left->intoTreeString(oss, indent);
1982      m_right->intoString(oss);          m_right->intoTreeString(oss, indent);
1983      oss << ')';          break;
1984      break;    case G_NP1OUT_2P:
1985            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1986            indent+='.';
1987            m_left->intoTreeString(oss, indent);
1988            break;
1989    default:    default:
1990      oss << "UNKNOWN";          oss << "UNKNOWN";
1991    }    }
1992  }  }
1993    
1994    
1995  DataAbstract*  DataAbstract*
1996  DataLazy::deepCopy()  DataLazy::deepCopy() const
1997  {  {
1998    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1999    {    {
2000    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2001    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
2002    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2003      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2004      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2005    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2006    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2007      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2008      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2009    default:    default:
2010      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2011    }    }
2012  }  }
2013    
2014    
2015    
2016  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2017  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2018  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
2019  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2020  // form part of the expression.  // form part of the expression.
2021  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2022  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2023  DataLazy::getLength() const  DataLazy::getLength() const
2024  {  {
2025    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1149  DataLazy::getSlice(const DataTypes::Regi Line 2034  DataLazy::getSlice(const DataTypes::Regi
2034    
2035    
2036  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2037  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2038  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2039                   int dataPointNo)                   int dataPointNo)
2040  {  {
2041    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2042    {    {
2043      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2044    }    }
2045    if (m_readytype!='E')    if (m_readytype!='E')
2046    {    {
2047      collapse();          collapse();
2048      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2049    }    }
2050    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
2051    // so we only need to know which child to ask    // so we only need to know which child to ask
2052    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
2053    {    {
2054      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2055    }    }
2056    else    else
2057    {    {
2058      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2059    }    }
2060  }  }
2061    
2062  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2063  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2064  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2065                   int dataPointNo) const                   int dataPointNo) const
2066  {  {
2067    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2068    {    {
2069      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2070    }    }
2071    if (m_readytype=='E')    if (m_readytype=='E')
2072    {    {
# Line 1189  DataLazy::getPointOffset(int sampleNo, Line 2074  DataLazy::getPointOffset(int sampleNo,
2074      // so we only need to know which child to ask      // so we only need to know which child to ask
2075      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
2076      {      {
2077      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2078      }      }
2079      else      else
2080      {      {
2081      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2082      }      }
2083    }    }
2084    if (m_readytype=='C')    if (m_readytype=='C')
2085    {    {
2086      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2087    }    }
2088    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2089  }  }
2090    
2091  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2092  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2093  void  void
2094  DataLazy::setToZero()  DataLazy::setToZero()
2095  {  {
2096    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2097    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2098    m_op=IDENTITY;  //   m_op=IDENTITY;
2099    m_right.reset();    //   m_right.reset();  
2100    m_left.reset();  //   m_left.reset();
2101    m_readytype='C';  //   m_readytype='C';
2102    m_buffsRequired=1;  //   m_buffsRequired=1;
2103    
2104      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2105      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2106  }  }
2107    
2108  }   // end namespace  bool
2109    DataLazy::actsExpanded() const
2110    {
2111            return (m_readytype=='E');
2112    }
2113    
2114    }       // end namespace
2115    

Legend:
Removed from v.2082  
changed lines
  Added in v.5985

  ViewVC Help
Powered by ViewVC 1.1.26