/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2082 by caltinay, Fri Nov 21 01:46:05 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 6125 by jfenwick, Tue Apr 5 04:12:13 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Apache License, version 2.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.apache.org/licenses/LICENSE-2.0
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18    #include "Data.h"
19    #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "Utils.h"
23    #include "DataVectorOps.h"
24    
25  #ifdef USE_NETCDF  #ifdef USE_NETCDF
26  #include <netcdfcpp.h>  #include <netcdfcpp.h>
27  #endif  #endif
28  #ifdef PASO_MPI  
29  #include <mpi.h>  #include <iomanip> // for some fancy formatting in debug
30  #endif  
31  #ifdef _OPENMP  using namespace escript::DataTypes;
32  #include <omp.h>  
33  #endif  #define NO_ARG
34  #include "FunctionSpace.h"  
35  #include "DataTypes.h"  // #define LAZYDEBUG(X) if (privdebug){X;}
36  #include "Data.h"  #define LAZYDEBUG(X)
37  #include "UnaryFuncs.h"     // for escript::fsign  namespace
38  #include "Utils.h"  {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
48    
49    
50    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
51    
52  /*  /*
53  How does DataLazy work?  How does DataLazy work?
# Line 39  A special operation, IDENTITY, stores an Line 60  A special operation, IDENTITY, stores an
60  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
61    
62  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
63  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
64    
65  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
66  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 47  I will refer to individual DataLazy obje Line 68  I will refer to individual DataLazy obje
68    
69  Each node also stores:  Each node also stores:
70  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
71      evaluated.          evaluated.
72  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
73      evaluate the expression.          evaluate the expression.
74  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
75    
76  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 70  The convention that I use, is that the r Line 91  The convention that I use, is that the r
91  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
92  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
93    
94  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
95  1) Add to the ES_optype.  1) Add to the ES_optype.
96  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
97  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 90  namespace escript Line 111  namespace escript
111  namespace  namespace
112  {  {
113    
114    
115    // enabling this will print out when ever the maximum stacksize used by resolve increases
116    // it assumes _OPENMP is also in use
117    //#define LAZY_STACK_PROF
118    
119    
120    
121    #ifndef _OPENMP
122      #ifdef LAZY_STACK_PROF
123      #undef LAZY_STACK_PROF
124      #endif
125    #endif
126    
127    
128    #ifdef LAZY_STACK_PROF
129    std::vector<void*> stackstart(getNumberOfThreads());
130    std::vector<void*> stackend(getNumberOfThreads());
131    size_t maxstackuse=0;
132    #endif
133    
134  enum ES_opgroup  enum ES_opgroup
135  {  {
136     G_UNKNOWN,     G_UNKNOWN,
137     G_IDENTITY,     G_IDENTITY,
138     G_BINARY,        // pointwise operations with two arguments     G_BINARY,            // pointwise operations with two arguments
139     G_UNARY,     // pointwise operations with one argument     G_UNARY,             // pointwise operations with one argument
140     G_NP1OUT,        // non-pointwise op with one output     G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
141     G_TENSORPROD     // general tensor product     G_NP1OUT,            // non-pointwise op with one output
142       G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
143       G_TENSORPROD,        // general tensor product
144       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
145       G_REDUCTION,         // non-pointwise unary op with a scalar output
146       G_CONDEVAL
147  };  };
148    
149    
150    
151    
152  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
153              "sin","cos","tan",                          "sin","cos","tan",
154              "asin","acos","atan","sinh","cosh","tanh","erf",                          "asin","acos","atan","sinh","cosh","tanh","erf",
155              "asinh","acosh","atanh",                          "asinh","acosh","atanh",
156              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "log10","log","sign","abs","neg","pos","exp","sqrt",
157              "1/","where>0","where<0","where>=0","where<=0",                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
158              "symmetric","nonsymmetric",                          "symmetric","antisymmetric",
159              "prod"};                          "prod",
160  int ES_opcount=36;                          "transpose", "trace",
161                            "swapaxes",
162                            "minval", "maxval",
163                            "condEval",
164                            "hermitian","antihermitian"
165    };
166    int ES_opcount=46;
167  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
168              G_UNARY,G_UNARY,G_UNARY, //10                          G_UNARY,G_UNARY,G_UNARY, //10
169              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
170              G_UNARY,G_UNARY,G_UNARY,                    // 20                          G_UNARY,G_UNARY,G_UNARY,                                        // 20
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
172              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33                          G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
173              G_NP1OUT,G_NP1OUT,                          G_NP1OUT,G_NP1OUT,
174              G_TENSORPROD};                          G_TENSORPROD,
175                            G_NP1OUT_P, G_NP1OUT_P,
176                            G_NP1OUT_2P,
177                            G_REDUCTION, G_REDUCTION,
178                            G_CONDEVAL,
179                            G_UNARY,G_UNARY
180    };
181  inline  inline
182  ES_opgroup  ES_opgroup
183  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 131  getOpgroup(ES_optype op) Line 189  getOpgroup(ES_optype op)
189  FunctionSpace  FunctionSpace
190  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
191  {  {
192      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
193      // maybe we need an interpolate node -          // maybe we need an interpolate node -
194      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
195      // programming error exception.          // programming error exception.
196    
197    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
198    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
199    if (l!=r)    if (l!=r)
200    {    {
201      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
202        if (res==1)
203      {      {
204      return l;          return l;
205      }      }
206      if (l.probeInterpolation(r))      if (res==-1)
207      {      {
208      return r;          return r;
209      }      }
210      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
211    }    }
# Line 158  resultFS(DataAbstract_ptr left, DataAbst Line 217  resultFS(DataAbstract_ptr left, DataAbst
217  DataTypes::ShapeType  DataTypes::ShapeType
218  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
219  {  {
220      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
221      {          {
222        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
223        {            {
224          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
225        }            }
226        if (left->getRank()==0)   // we need to allow scalar * anything  
227        {            if (left->getRank()==0)       // we need to allow scalar * anything
228          return right->getShape();            {
229        }                  return right->getShape();
230        if (right->getRank()==0)            }
231        {            if (right->getRank()==0)
232          return left->getShape();            {
233        }                  return left->getShape();
234        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            }
235      }            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
236      return left->getShape();          }
237            return left->getShape();
238    }
239    
240    // return the shape for "op left"
241    
242    DataTypes::ShapeType
243    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
244    {
245            switch(op)
246            {
247            case TRANS:
248               {                    // for the scoping of variables
249                    const DataTypes::ShapeType& s=left->getShape();
250                    DataTypes::ShapeType sh;
251                    int rank=left->getRank();
252                    if (axis_offset<0 || axis_offset>rank)
253                    {
254                stringstream e;
255                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
256                throw DataException(e.str());
257            }
258            for (int i=0; i<rank; i++)
259                    {
260                       int index = (axis_offset+i)%rank;
261               sh.push_back(s[index]); // Append to new shape
262            }
263                    return sh;
264               }
265            break;
266            case TRACE:
267               {
268                    int rank=left->getRank();
269                    if (rank<2)
270                    {
271                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
272                    }
273                    if ((axis_offset>rank-2) || (axis_offset<0))
274                    {
275                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
276                    }
277                    if (rank==2)
278                    {
279                       return DataTypes::scalarShape;
280                    }
281                    else if (rank==3)
282                    {
283                       DataTypes::ShapeType sh;
284                       if (axis_offset==0)
285                       {
286                            sh.push_back(left->getShape()[2]);
287                       }
288                       else         // offset==1
289                       {
290                            sh.push_back(left->getShape()[0]);
291                       }
292                       return sh;
293                    }
294                    else if (rank==4)
295                    {
296                       DataTypes::ShapeType sh;
297                       const DataTypes::ShapeType& s=left->getShape();
298                       if (axis_offset==0)
299                       {
300                            sh.push_back(s[2]);
301                            sh.push_back(s[3]);
302                       }
303                       else if (axis_offset==1)
304                       {
305                            sh.push_back(s[0]);
306                            sh.push_back(s[3]);
307                       }
308                       else         // offset==2
309                       {
310                            sh.push_back(s[0]);
311                            sh.push_back(s[1]);
312                       }
313                       return sh;
314                    }
315                    else            // unknown rank
316                    {
317                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
318                    }
319               }
320            break;
321            default:
322            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
323            }
324  }  }
325    
326    DataTypes::ShapeType
327    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
328    {
329         // This code taken from the Data.cpp swapaxes() method
330         // Some of the checks are probably redundant here
331         int axis0_tmp,axis1_tmp;
332         const DataTypes::ShapeType& s=left->getShape();
333         DataTypes::ShapeType out_shape;
334         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
335         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
336         int rank=left->getRank();
337         if (rank<2) {
338            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
339         }
340         if (axis0<0 || axis0>rank-1) {
341            stringstream e;
342            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
343            throw DataException(e.str());
344         }
345         if (axis1<0 || axis1>rank-1) {
346            stringstream e;
347            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
348            throw DataException(e.str());
349         }
350         if (axis0 == axis1) {
351             throw DataException("Error - Data::swapaxes: axis indices must be different.");
352         }
353         if (axis0 > axis1) {
354             axis0_tmp=axis1;
355             axis1_tmp=axis0;
356         } else {
357             axis0_tmp=axis0;
358             axis1_tmp=axis1;
359         }
360         for (int i=0; i<rank; i++) {
361           if (i == axis0_tmp) {
362              out_shape.push_back(s[axis1_tmp]);
363           } else if (i == axis1_tmp) {
364              out_shape.push_back(s[axis0_tmp]);
365           } else {
366              out_shape.push_back(s[i]);
367           }
368         }
369        return out_shape;
370    }
371    
372    
373  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
374  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
375  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
376  DataTypes::ShapeType  DataTypes::ShapeType
377  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
378  {  {
379                
380    // Get rank and shape of inputs    // Get rank and shape of inputs
381    int rank0 = left->getRank();    int rank0 = left->getRank();
382    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 192  GTPShape(DataAbstract_ptr left, DataAbst Line 385  GTPShape(DataAbstract_ptr left, DataAbst
385    
386    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
387    int start0=0, start1=0;    int start0=0, start1=0;
388    if (transpose == 0)       {}    if (transpose == 0)           {}
389    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
390    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
391    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
392    
393      if (rank0<axis_offset)
394      {
395            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
396      }
397    
398    // Adjust the shapes for transpose    // Adjust the shapes for transpose
399    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
400    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
401    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
402    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
403    
404    // Prepare for the loops of the product    // Prepare for the loops of the product
405    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
406    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
407      SL *= tmpShape0[i];      SL *= tmpShape0[i];
408    }    }
409    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
410      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
411        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
412      }      }
413      SM *= tmpShape0[i];      SM *= tmpShape0[i];
414    }    }
415    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
416      SR *= tmpShape1[i];      SR *= tmpShape1[i];
417    }    }
418    
419    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
420    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
421    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
422       int out_index=0;       int out_index=0;
423       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
424       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
425    }    }
   return shape2;  
 }  
426    
427      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
428      {
429         ostringstream os;
430         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
431         throw DataException(os.str());
432      }
433    
434  // determine the number of points in the result of "left op right"    return shape2;
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
435  }  }
436    
437    }       // end anonymous namespace
 }   // end anonymous namespace  
438    
439    
440    
# Line 279  opToString(ES_optype op) Line 449  opToString(ES_optype op)
449    return ES_opstrings[op];    return ES_opstrings[op];
450  }  }
451    
452    void DataLazy::LazyNodeSetup()
453    {
454    #ifdef _OPENMP
455        int numthreads=omp_get_max_threads();
456        m_samples.resize(numthreads*m_samplesize);
457        m_sampleids=new int[numthreads];
458        for (int i=0;i<numthreads;++i)
459        {
460            m_sampleids[i]=-1;  
461        }
462    #else
463        m_samples.resize(m_samplesize);
464        m_sampleids=new int[1];
465        m_sampleids[0]=-1;
466    #endif  // _OPENMP
467    }
468    
469    
470    // Creates an identity node
471  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
472      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
473      m_op(IDENTITY),          ,m_sampleids(0),
474      m_axis_offset(0),          m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
475  {  {
476     if (p->isLazy())     if (p->isLazy())
477     {     {
478      // I don't want identity of Lazy.          // I don't want identity of Lazy.
479      // Question: Why would that be so bad?          // Question: Why would that be so bad?
480      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
481      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
482     }     }
483     else     else
484     {     {
485      m_id=dynamic_pointer_cast<DataReady>(p);          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
486      if(p->isConstant()) {m_readytype='C';}          makeIdentity(dr);
487      else if(p->isExpanded()) {m_readytype='E';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else if (p->isTagged()) {m_readytype='T';}  
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
488     }     }
489     m_buffsRequired=1;  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
490  }  }
491    
   
   
   
492  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
493      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
494      m_op(op),          m_op(op),
495      m_axis_offset(0),          m_axis_offset(0),
496      m_transpose(0),          m_transpose(0),
497      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
498  {  {
499     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
500     {     {
501      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
502     }     }
503    
504     DataLazy_ptr lleft;     DataLazy_ptr lleft;
505     if (!left->isLazy())     if (!left->isLazy())
506     {     {
507      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
508     }     }
509     else     else
510     {     {
511      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
512     }     }
513     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
514     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
515     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
516     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
517       m_height=m_left->m_height+1;
518       LazyNodeSetup();
519       SIZELIMIT
520  }  }
521    
522    
523  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
524  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
525      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
526      m_op(op),          m_op(op),
527      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
528  {  {
529    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
530     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
531     {     {
532      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
533     }     }
534    
535     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
536     {     {
537      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
538      Data ltemp(left);          Data ltemp(left);
539      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
540      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
541     }     }
542     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
543     {     {
544      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
545      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
546    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
547     }     }
548     left->operandCheck(*right);     left->operandCheck(*right);
549    
550     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
551     {     {
552      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
553    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
554     }     }
555     else     else
556     {     {
557      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
558    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
559     }     }
560     if (right->isLazy())     if (right->isLazy())
561     {     {
562      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
563    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
564     }     }
565     else     else
566     {     {
567      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
568    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
569     }     }
570     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
571     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
572     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
573     {     {
574      m_readytype='E';          m_readytype='E';
575     }     }
576     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
577     {     {
578      m_readytype='T';          m_readytype='T';
579     }     }
580     else     else
581     {     {
582      m_readytype='C';          m_readytype='C';
583     }     }
584     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
585     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
586     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
587  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
588       SIZELIMIT
589    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
590  }  }
591    
592  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
593      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
594      m_op(op),          m_op(op),
595      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
596      m_transpose(transpose)          m_transpose(transpose)
597  {  {
598     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
599     {     {
600      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
601     }     }
602     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
603     {     {
604      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
605     }     }
606     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
607     {     {
608      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
609      Data ltemp(left);          Data ltemp(left);
610      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
611      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
612     }     }
613     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
614     {     {
615      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
616      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
617     }     }
618     left->operandCheck(*right);  //    left->operandCheck(*right);
619    
620     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
621     {     {
622      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
623     }     }
624     else     else
625     {     {
626      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
627     }     }
628     if (right->isLazy())     if (right->isLazy())
629     {     {
630      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
631     }     }
632     else     else
633     {     {
634      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
635     }     }
636     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
637     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
638     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
639     {     {
640      m_readytype='E';          m_readytype='E';
641     }     }
642     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
643     {     {
644      m_readytype='T';          m_readytype='T';
645     }     }
646     else     else
647     {     {
648      m_readytype='C';          m_readytype='C';
649     }     }
650     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
651     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
652     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
653  cout << "(4)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
654       SIZELIMIT
655    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
656  }  }
657    
658    
659  DataLazy::~DataLazy()  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
660            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
661            m_op(op),
662            m_axis_offset(axis_offset),
663            m_transpose(0),
664            m_tol(0)
665    {
666       if ((getOpgroup(op)!=G_NP1OUT_P))
667       {
668            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
669       }
670       DataLazy_ptr lleft;
671       if (!left->isLazy())
672       {
673            lleft=DataLazy_ptr(new DataLazy(left));
674       }
675       else
676       {
677            lleft=dynamic_pointer_cast<DataLazy>(left);
678       }
679       m_readytype=lleft->m_readytype;
680       m_left=lleft;
681       m_samplesize=getNumDPPSample()*getNoValues();
682       m_children=m_left->m_children+1;
683       m_height=m_left->m_height+1;
684       LazyNodeSetup();
685       SIZELIMIT
686    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
687    }
688    
689    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
690            : parent(left->getFunctionSpace(), left->getShape()),
691            m_op(op),
692            m_axis_offset(0),
693            m_transpose(0),
694            m_tol(tol)
695    {
696       if ((getOpgroup(op)!=G_UNARY_P))
697       {
698            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
699       }
700       DataLazy_ptr lleft;
701       if (!left->isLazy())
702       {
703            lleft=DataLazy_ptr(new DataLazy(left));
704       }
705       else
706       {
707            lleft=dynamic_pointer_cast<DataLazy>(left);
708       }
709       m_readytype=lleft->m_readytype;
710       m_left=lleft;
711       m_samplesize=getNumDPPSample()*getNoValues();
712       m_children=m_left->m_children+1;
713       m_height=m_left->m_height+1;
714       LazyNodeSetup();
715       SIZELIMIT
716    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
717    }
718    
719    
720    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
721            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
722            m_op(op),
723            m_axis_offset(axis0),
724            m_transpose(axis1),
725            m_tol(0)
726  {  {
727       if ((getOpgroup(op)!=G_NP1OUT_2P))
728       {
729            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
730       }
731       DataLazy_ptr lleft;
732       if (!left->isLazy())
733       {
734            lleft=DataLazy_ptr(new DataLazy(left));
735       }
736       else
737       {
738            lleft=dynamic_pointer_cast<DataLazy>(left);
739       }
740       m_readytype=lleft->m_readytype;
741       m_left=lleft;
742       m_samplesize=getNumDPPSample()*getNoValues();
743       m_children=m_left->m_children+1;
744       m_height=m_left->m_height+1;
745       LazyNodeSetup();
746       SIZELIMIT
747    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
748  }  }
749    
750    
751  int  namespace
752  DataLazy::getBuffsRequired() const  {
753    
754        inline int max3(int a, int b, int c)
755        {
756            int t=(a>b?a:b);
757            return (t>c?t:c);
758    
759        }
760    }
761    
762    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
763            : parent(left->getFunctionSpace(), left->getShape()),
764            m_op(CONDEVAL),
765            m_axis_offset(0),
766            m_transpose(0),
767            m_tol(0)
768  {  {
769      return m_buffsRequired;  
770       DataLazy_ptr lmask;
771       DataLazy_ptr lleft;
772       DataLazy_ptr lright;
773       if (!mask->isLazy())
774       {
775            lmask=DataLazy_ptr(new DataLazy(mask));
776       }
777       else
778       {
779            lmask=dynamic_pointer_cast<DataLazy>(mask);
780       }
781       if (!left->isLazy())
782       {
783            lleft=DataLazy_ptr(new DataLazy(left));
784       }
785       else
786       {
787            lleft=dynamic_pointer_cast<DataLazy>(left);
788       }
789       if (!right->isLazy())
790       {
791            lright=DataLazy_ptr(new DataLazy(right));
792       }
793       else
794       {
795            lright=dynamic_pointer_cast<DataLazy>(right);
796       }
797       m_readytype=lmask->m_readytype;
798       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
799       {
800            throw DataException("Programmer Error - condEval arguments must have the same readytype");
801       }
802       m_left=lleft;
803       m_right=lright;
804       m_mask=lmask;
805       m_samplesize=getNumDPPSample()*getNoValues();
806       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
807       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
808       LazyNodeSetup();
809       SIZELIMIT
810    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
811  }  }
812    
813    
814  size_t  
815  DataLazy::getMaxSampleSize() const  DataLazy::~DataLazy()
816  {  {
817      return m_maxsamplesize;     delete[] m_sampleids;
818  }  }
819    
820    
821  /*  /*
822    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
823    This does the work for the collapse method.    This does the work for the collapse method.
824    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
825  */  */
826  DataReady_ptr  DataReady_ptr
827  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
828  {  {
829    if (m_readytype=='E')    if (m_readytype=='E')
830    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
831      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
832    }    }
833    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 511  DataLazy::collapseToReady() Line 845  DataLazy::collapseToReady()
845    switch(m_op)    switch(m_op)
846    {    {
847      case ADD:      case ADD:
848      result=left+right;          result=left+right;
849      break;          break;
850      case SUB:            case SUB:          
851      result=left-right;          result=left-right;
852      break;          break;
853      case MUL:            case MUL:          
854      result=left*right;          result=left*right;
855      break;          break;
856      case DIV:            case DIV:          
857      result=left/right;          result=left/right;
858      break;          break;
859      case SIN:      case SIN:
860      result=left.sin();            result=left.sin();      
861      break;          break;
862      case COS:      case COS:
863      result=left.cos();          result=left.cos();
864      break;          break;
865      case TAN:      case TAN:
866      result=left.tan();          result=left.tan();
867      break;          break;
868      case ASIN:      case ASIN:
869      result=left.asin();          result=left.asin();
870      break;          break;
871      case ACOS:      case ACOS:
872      result=left.acos();          result=left.acos();
873      break;          break;
874      case ATAN:      case ATAN:
875      result=left.atan();          result=left.atan();
876      break;          break;
877      case SINH:      case SINH:
878      result=left.sinh();          result=left.sinh();
879      break;          break;
880      case COSH:      case COSH:
881      result=left.cosh();          result=left.cosh();
882      break;          break;
883      case TANH:      case TANH:
884      result=left.tanh();          result=left.tanh();
885      break;          break;
886      case ERF:      case ERF:
887      result=left.erf();          result=left.erf();
888      break;          break;
889     case ASINH:     case ASINH:
890      result=left.asinh();          result=left.asinh();
891      break;          break;
892     case ACOSH:     case ACOSH:
893      result=left.acosh();          result=left.acosh();
894      break;          break;
895     case ATANH:     case ATANH:
896      result=left.atanh();          result=left.atanh();
897      break;          break;
898      case LOG10:      case LOG10:
899      result=left.log10();          result=left.log10();
900      break;          break;
901      case LOG:      case LOG:
902      result=left.log();          result=left.log();
903      break;          break;
904      case SIGN:      case SIGN:
905      result=left.sign();          result=left.sign();
906      break;          break;
907      case ABS:      case ABS:
908      result=left.abs();          result=left.abs();
909      break;          break;
910      case NEG:      case NEG:
911      result=left.neg();          result=left.neg();
912      break;          break;
913      case POS:      case POS:
914      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
915      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
916      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
917      break;          break;
918      case EXP:      case EXP:
919      result=left.exp();          result=left.exp();
920      break;          break;
921      case SQRT:      case SQRT:
922      result=left.sqrt();          result=left.sqrt();
923      break;          break;
924      case RECIP:      case RECIP:
925      result=left.oneOver();          result=left.oneOver();
926      break;          break;
927      case GZ:      case GZ:
928      result=left.wherePositive();          result=left.wherePositive();
929      break;          break;
930      case LZ:      case LZ:
931      result=left.whereNegative();          result=left.whereNegative();
932      break;          break;
933      case GEZ:      case GEZ:
934      result=left.whereNonNegative();          result=left.whereNonNegative();
935      break;          break;
936      case LEZ:      case LEZ:
937      result=left.whereNonPositive();          result=left.whereNonPositive();
938      break;          break;
939        case NEZ:
940            result=left.whereNonZero(m_tol);
941            break;
942        case EZ:
943            result=left.whereZero(m_tol);
944            break;
945      case SYM:      case SYM:
946      result=left.symmetric();          result=left.symmetric();
947      break;          break;
948      case NSYM:      case NSYM:
949      result=left.nonsymmetric();          result=left.antisymmetric();
950      break;          break;
951      case PROD:      case PROD:
952      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
953            break;
954        case TRANS:
955            result=left.transpose(m_axis_offset);
956            break;
957        case TRACE:
958            result=left.trace(m_axis_offset);
959            break;
960        case SWAP:
961            result=left.swapaxes(m_axis_offset, m_transpose);
962            break;
963        case MINVAL:
964            result=left.minval();
965            break;
966        case MAXVAL:
967            result=left.minval();
968            break;
969        case HER:
970        result=left.hermitian();
971      break;      break;
972      default:      default:
973      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
974    }    }
975    return result.borrowReadyPtr();    return result.borrowReadyPtr();
976  }  }
# Line 624  DataLazy::collapseToReady() Line 982  DataLazy::collapseToReady()
982     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
983  */  */
984  void  void
985  DataLazy::collapse()  DataLazy::collapse() const
986  {  {
987    if (m_op==IDENTITY)    if (m_op==IDENTITY)
988    {    {
989      return;          return;
990    }    }
991    if (m_readytype=='E')    if (m_readytype=='E')
992    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
993      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
994    }    }
995    m_id=collapseToReady();    m_id=collapseToReady();
996    m_op=IDENTITY;    m_op=IDENTITY;
997  }  }
998    
999  /*  // The result will be stored in m_samples
1000    \brief Compute the value of the expression (unary operation) for the given sample.  // The return value is a pointer to the DataVector, offset is the offset within the return value
1001    \return Vector which stores the value of the subexpression for the given sample.  const DataTypes::RealVectorType*
1002    \param v A vector to store intermediate results.  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1003    \param offset Index in v to begin storing results.  {
1004    \param sampleNo Sample number to evaluate.  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1005    \param roffset (output parameter) the offset in the return vector where the result begins.          // collapse so we have a 'E' node or an IDENTITY for some other type
1006      if (m_readytype!='E' && m_op!=IDENTITY)
1007    The return value will be an existing vector so do not deallocate it.    {
1008    If the result is stored in v it should be stored at the offset given.          collapse();
1009    Everything from offset to the end of v should be considered available for this method to use.    }
1010  */    if (m_op==IDENTITY)  
1011  DataTypes::ValueType*    {
1012  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const      const RealVectorType& vec=m_id->getVectorRO();
1013        roffset=m_id->getPointOffset(sampleNo, 0);
1014    #ifdef LAZY_STACK_PROF
1015    int x;
1016    if (&x<stackend[omp_get_thread_num()])
1017    {
1018           stackend[omp_get_thread_num()]=&x;
1019    }
1020    #endif
1021        return &(vec);
1022      }
1023      if (m_readytype!='E')
1024      {
1025        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1026      }
1027      if (m_sampleids[tid]==sampleNo)
1028      {
1029            roffset=tid*m_samplesize;
1030            return &(m_samples);            // sample is already resolved
1031      }
1032      m_sampleids[tid]=sampleNo;
1033    
1034      switch (getOpgroup(m_op))
1035      {
1036      case G_UNARY:
1037      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1038      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1039      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1040      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1041      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1042      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1043      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1044      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1045      default:
1046        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1047      }
1048    }
1049    
1050    const DataTypes::RealVectorType*
1051    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1052  {  {
1053      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1054      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1055      // processing single points.          // processing single points.
1056            // we will also know we won't get identity nodes
1057    if (m_readytype!='E')    if (m_readytype!='E')
1058    {    {
1059      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1060    }    }
1061    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1062    const double* left=&((*vleft)[roffset]);    {
1063    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1064    roffset=offset;    }
1065      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1066      const double* left=&((*leftres)[roffset]);
1067      roffset=m_samplesize*tid;
1068      double* result=&(m_samples[roffset]);
1069      escript::ESFunction operation=SINF;
1070    switch (m_op)    switch (m_op)
1071    {    {
1072      case SIN:        case SIN:
1073      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);      operation=SINF;
1074      break;      break;
1075      case COS:      case COS:
1076      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);          operation=COSF;
1077      break;      break;
1078      case TAN:      case TAN:
1079      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);          operation=TANF;
1080      break;      break;
1081      case ASIN:      case ASIN:
1082      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);          operation=ASINF;
1083      break;      break;
1084      case ACOS:      case ACOS:
1085      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);          operation=ACOSF;
1086      break;      break;
1087      case ATAN:      case ATAN:
1088      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);          operation=ATANF;
1089      break;      break;
1090      case SINH:      case SINH:
1091      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);          operation=SINHF;
1092      break;      break;
1093      case COSH:      case COSH:
1094      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);          operation=COSHF;
1095      break;      break;
1096      case TANH:      case TANH:
1097      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);          operation=TANHF;
1098      break;      break;
1099      case ERF:      case ERF:
1100  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ERFF;
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
1101      break;      break;
 #endif  
1102     case ASINH:     case ASINH:
1103  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ASINHF;
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
1104      break;      break;
1105     case ACOSH:     case ACOSH:
1106  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ACOSHF;
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
1107      break;      break;
1108     case ATANH:     case ATANH:
1109  #if defined (_WIN32) && !defined(__INTEL_COMPILER)          operation=ATANHF;
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
1110      break;      break;
1111      case LOG10:      case LOG10:
1112      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);          operation=LOG10F;
1113      break;      break;
1114      case LOG:      case LOG:
1115      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);          operation=LOGF;
1116      break;      break;
1117      case SIGN:      case SIGN:
1118      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          operation=SIGNF;
1119      break;      break;
1120      case ABS:      case ABS:
1121      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);          operation=ABSF;
1122      break;      break;
1123      case NEG:      case NEG:
1124      tensor_unary_operation(m_samplesize, left, result, negate<double>());          operation=NEGF;
1125      break;      break;
1126      case POS:      case POS:
1127      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1128      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1129      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1130      break;          break;
1131      case EXP:      case EXP:
1132      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);          operation=EXPF;
1133      break;      break;
1134      case SQRT:      case SQRT:
1135      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);          operation=SQRTF;
1136      break;      break;
1137      case RECIP:      case RECIP:
1138      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          operation=INVF;
1139      break;      break;
1140      case GZ:      case GZ:
1141      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          operation=GTZEROF;
1142      break;      break;
1143      case LZ:      case LZ:
1144      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          operation=LTZEROF;
1145      break;      break;
1146      case GEZ:      case GEZ:
1147      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          operation=GEZEROF;
1148      break;      break;
1149      case LEZ:      case LEZ:
1150      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          operation=LEZEROF;
1151        break;
1152    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1153        case NEZ:
1154            operation=NEQZEROF;
1155        break;
1156        case EZ:
1157            operation=EQZEROF;
1158      break;      break;
   
1159      default:      default:
1160      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1161    }    }
1162    return &v;    tensor_unary_array_operation(m_samplesize,
1163                                 left,
1164                                 result,
1165                                 operation,
1166                                 m_tol);  
1167      return &(m_samples);
1168  }  }
1169    
1170    
1171  /*  const DataTypes::RealVectorType*
1172    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1173  {  {
1174      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1175      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1176      // processing single points.          // processing single points.
1177            // we will also know we won't get identity nodes
1178    if (m_readytype!='E')    if (m_readytype!='E')
1179    {    {
1180      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1181    }    }
1182      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1183    size_t subroffset=roffset+m_samplesize;    {
1184    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1185    roffset=offset;    }
1186      size_t loffset=0;
1187      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1188    
1189      roffset=m_samplesize*tid;
1190      unsigned int ndpps=getNumDPPSample();
1191      unsigned int psize=DataTypes::noValues(m_left->getShape());
1192      double* result=&(m_samples[roffset]);
1193      switch (m_op)
1194      {
1195        case MINVAL:
1196            {
1197              for (unsigned int z=0;z<ndpps;++z)
1198              {
1199                FMin op;
1200                *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1201                loffset+=psize;
1202                result++;
1203              }
1204            }
1205            break;
1206        case MAXVAL:
1207            {
1208              for (unsigned int z=0;z<ndpps;++z)
1209              {
1210              FMax op;
1211              *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1212              loffset+=psize;
1213              result++;
1214              }
1215            }
1216            break;
1217        default:
1218            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1219      }
1220      return &(m_samples);
1221    }
1222    
1223    const DataTypes::RealVectorType*
1224    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1225    {
1226            // we assume that any collapsing has been done before we get here
1227            // since we only have one argument we don't need to think about only
1228            // processing single points.
1229      if (m_readytype!='E')
1230      {
1231        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1232      }
1233      if (m_op==IDENTITY)
1234      {
1235        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1236      }
1237      size_t subroffset;
1238      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1239      roffset=m_samplesize*tid;
1240      size_t loop=0;
1241      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1242      size_t step=getNoValues();
1243      size_t offset=roffset;
1244    switch (m_op)    switch (m_op)
1245    {    {
1246      case SYM:      case SYM:
1247      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1248      break;          {
1249                escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1250                subroffset+=step;
1251                offset+=step;
1252            }
1253            break;
1254      case NSYM:      case NSYM:
1255      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1256      break;          {
1257                escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1258                subroffset+=step;
1259                offset+=step;
1260            }
1261            break;
1262        default:
1263            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1264      }
1265      return &m_samples;
1266    }
1267    
1268    const DataTypes::RealVectorType*
1269    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1270    {
1271            // we assume that any collapsing has been done before we get here
1272            // since we only have one argument we don't need to think about only
1273            // processing single points.
1274      if (m_readytype!='E')
1275      {
1276        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1277      }
1278      if (m_op==IDENTITY)
1279      {
1280        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1281      }
1282      size_t subroffset;
1283      size_t offset;
1284      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1285      roffset=m_samplesize*tid;
1286      offset=roffset;
1287      size_t loop=0;
1288      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1289      size_t outstep=getNoValues();
1290      size_t instep=m_left->getNoValues();
1291      switch (m_op)
1292      {
1293        case TRACE:
1294            for (loop=0;loop<numsteps;++loop)
1295            {
1296                escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1297                subroffset+=instep;
1298                offset+=outstep;
1299            }
1300            break;
1301        case TRANS:
1302            for (loop=0;loop<numsteps;++loop)
1303            {
1304                escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1305                subroffset+=instep;
1306                offset+=outstep;
1307            }
1308            break;
1309        default:
1310            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1311      }
1312      return &m_samples;
1313    }
1314    
1315    
1316    const DataTypes::RealVectorType*
1317    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1318    {
1319      if (m_readytype!='E')
1320      {
1321        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1322      }
1323      if (m_op==IDENTITY)
1324      {
1325        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1326      }
1327      size_t subroffset;
1328      size_t offset;
1329      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1330      roffset=m_samplesize*tid;
1331      offset=roffset;
1332      size_t loop=0;
1333      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1334      size_t outstep=getNoValues();
1335      size_t instep=m_left->getNoValues();
1336      switch (m_op)
1337      {
1338        case SWAP:
1339            for (loop=0;loop<numsteps;++loop)
1340            {
1341                escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1342                subroffset+=instep;
1343                offset+=outstep;
1344            }
1345            break;
1346      default:      default:
1347      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1348    }    }
1349    return &v;    return &m_samples;
1350  }  }
1351    
1352    const DataTypes::RealVectorType*
1353    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1354    {
1355      if (m_readytype!='E')
1356      {
1357        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1358      }
1359      if (m_op!=CONDEVAL)
1360      {
1361        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1362      }
1363      size_t subroffset;
1364    
1365      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1366      const RealVectorType* srcres=0;
1367      if ((*maskres)[subroffset]>0)
1368      {
1369            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1370      }
1371      else
1372      {
1373            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1374      }
1375    
1376      // Now we need to copy the result
1377    
1378      roffset=m_samplesize*tid;
1379      for (int i=0;i<m_samplesize;++i)
1380      {
1381            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1382      }
1383    
1384  #define PROC_OP(TYPE,X)                               \    return &m_samples;
1385      for (int i=0;i<steps;++i,resultp+=resultStep) \  }
     { \  
        tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  
        lroffset+=leftStep; \  
        rroffset+=rightStep; \  
     }  
1386    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1387  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1388  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1389  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 842  DataLazy::resolveNP1OUT(ValueType& v, si Line 1393  DataLazy::resolveNP1OUT(ValueType& v, si
1393  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1394  // For example, 2+Vector.  // For example, 2+Vector.
1395  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1396  DataTypes::ValueType*  const DataTypes::RealVectorType*
1397  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1398  {  {
1399  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1400    
1401    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1402      // first work out which of the children are expanded          // first work out which of the children are expanded
1403    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1404    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1405    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1406    int steps=(bigloops?1:getNumDPPSample());    {
1407    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1408    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1409    {    bool leftScalar=(m_left->getRank()==0);
1410      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1411      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1412      chunksize=1;    // for scalar    {
1413    }              throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1414    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1415    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1416    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1417      // Get the values of sub-expressions    size_t chunksize=1;                   // how many doubles will be processed in one go
1418    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    int leftstep=0;               // how far should the left offset advance after each step
1419    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    int rightstep=0;
1420      // the right child starts further along.    int numsteps=0;               // total number of steps for the inner loop
1421    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    int oleftstep=0;      // the o variables refer to the outer loop
1422      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1423      int onumsteps=1;
1424      
1425      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1426      bool RES=(rightExp && rightScalar);
1427      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1428      bool RS=(!rightExp && rightScalar);
1429      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1430      bool RN=(!rightExp && !rightScalar);
1431      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1432      bool REN=(rightExp && !rightScalar);
1433    
1434      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1435      {
1436            chunksize=m_left->getNumDPPSample()*leftsize;
1437            leftstep=0;
1438            rightstep=0;
1439            numsteps=1;
1440      }
1441      else if (LES || RES)
1442      {
1443            chunksize=1;
1444            if (LES)                // left is an expanded scalar
1445            {
1446                    if (RS)
1447                    {
1448                       leftstep=1;
1449                       rightstep=0;
1450                       numsteps=m_left->getNumDPPSample();
1451                    }
1452                    else            // RN or REN
1453                    {
1454                       leftstep=0;
1455                       oleftstep=1;
1456                       rightstep=1;
1457                       orightstep=(RN ? -(int)rightsize : 0);
1458                       numsteps=rightsize;
1459                       onumsteps=m_left->getNumDPPSample();
1460                    }
1461            }
1462            else            // right is an expanded scalar
1463            {
1464                    if (LS)
1465                    {
1466                       rightstep=1;
1467                       leftstep=0;
1468                       numsteps=m_right->getNumDPPSample();
1469                    }
1470                    else
1471                    {
1472                       rightstep=0;
1473                       orightstep=1;
1474                       leftstep=1;
1475                       oleftstep=(LN ? -(int)leftsize : 0);
1476                       numsteps=leftsize;
1477                       onumsteps=m_right->getNumDPPSample();
1478                    }
1479            }
1480      }
1481      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1482      {
1483            if (LEN)        // and Right will be a single value
1484            {
1485                    chunksize=rightsize;
1486                    leftstep=rightsize;
1487                    rightstep=0;
1488                    numsteps=m_left->getNumDPPSample();
1489                    if (RS)
1490                    {
1491                       numsteps*=leftsize;
1492                    }
1493            }
1494            else    // REN
1495            {
1496                    chunksize=leftsize;
1497                    rightstep=leftsize;
1498                    leftstep=0;
1499                    numsteps=m_right->getNumDPPSample();
1500                    if (LS)
1501                    {
1502                       numsteps*=rightsize;
1503                    }
1504            }
1505      }
1506    
1507      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1508            // Get the values of sub-expressions
1509      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1510      const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1511    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1512    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1513    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1514    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1515    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1516    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1517    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1518    
1519    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1520    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1521    
1522    
1523      roffset=m_samplesize*tid;
1524      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1525    switch(m_op)    switch(m_op)
1526    {    {
1527      case ADD:      case ADD:
1528          PROC_OP(NO_ARG,plus<double>());          //PROC_OP(NO_ARG,plus<double>());
1529      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1530                 &(*left)[0],
1531                 &(*right)[0],
1532                 chunksize,
1533                 onumsteps,
1534                 numsteps,
1535                 resultStep,
1536                 leftstep,
1537                 rightstep,
1538                 oleftstep,
1539                 orightstep,
1540                 lroffset,
1541                 rroffset,
1542                 escript::ESFunction::PLUSF);  
1543            break;
1544      case SUB:      case SUB:
1545      PROC_OP(NO_ARG,minus<double>());        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1546      break;               &(*left)[0],
1547                 &(*right)[0],
1548                 chunksize,
1549                 onumsteps,
1550                 numsteps,
1551                 resultStep,
1552                 leftstep,
1553                 rightstep,
1554                 oleftstep,
1555                 orightstep,
1556                 lroffset,
1557                 rroffset,
1558                 escript::ESFunction::MINUSF);        
1559            //PROC_OP(NO_ARG,minus<double>());
1560            break;
1561      case MUL:      case MUL:
1562      PROC_OP(NO_ARG,multiplies<double>());          //PROC_OP(NO_ARG,multiplies<double>());
1563      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1564                 &(*left)[0],
1565                 &(*right)[0],
1566                 chunksize,
1567                 onumsteps,
1568                 numsteps,
1569                 resultStep,
1570                 leftstep,
1571                 rightstep,
1572                 oleftstep,
1573                 orightstep,
1574                 lroffset,
1575                 rroffset,
1576                 escript::ESFunction::MULTIPLIESF);      
1577            break;
1578      case DIV:      case DIV:
1579      PROC_OP(NO_ARG,divides<double>());          //PROC_OP(NO_ARG,divides<double>());
1580      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1581                 &(*left)[0],
1582                 &(*right)[0],
1583                 chunksize,
1584                 onumsteps,
1585                 numsteps,
1586                 resultStep,
1587                 leftstep,
1588                 rightstep,
1589                 oleftstep,
1590                 orightstep,
1591                 lroffset,
1592                 rroffset,
1593                 escript::ESFunction::DIVIDESF);          
1594            break;
1595      case POW:      case POW:
1596         PROC_OP(double (double,double),::pow);         //PROC_OP(double (double,double),::pow);
1597      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1598                 &(*left)[0],
1599                 &(*right)[0],
1600                 chunksize,
1601                 onumsteps,
1602                 numsteps,
1603                 resultStep,
1604                 leftstep,
1605                 rightstep,
1606                 oleftstep,
1607                 orightstep,
1608                 lroffset,
1609                 rroffset,
1610                 escript::ESFunction::POWF);          
1611            break;
1612      default:      default:
1613      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1614    }    }
1615    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1616    return &v;    return &m_samples;
1617  }  }
1618    
1619    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1620  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1621  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1622  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1623  DataTypes::ValueType*  const DataTypes::RealVectorType*
1624  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1625  {  {
1626  cout << "Resolve TensorProduct: " << toString() << endl;  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1627    
1628    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1629      // first work out which of the children are expanded          // first work out which of the children are expanded
1630    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1631    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1632    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1633    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1634    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1635    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1636      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1637    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1638    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1639    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1640      const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1641    
1642      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1643    
1644    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1645    cout << getNoValues() << endl;)
1646    
1647    
1648    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1649    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1650    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1651    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1652    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1653    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1654    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1655    
1656      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1657    switch(m_op)    switch(m_op)
1658    {    {
1659      case PROD:      case PROD:
1660      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1661      {          {
1662            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1663            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1664            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
1665        lroffset+=leftStep;  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1666        rroffset+=rightStep;  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1667      }  
1668      break;            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1669    
1670              lroffset+=leftStep;
1671              rroffset+=rightStep;
1672            }
1673            break;
1674      default:      default:
1675      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1676    }    }
1677    roffset=offset;    roffset=offset;
1678    return &v;    return &m_samples;
1679  }  }
1680    
1681    
1682    const DataTypes::RealVectorType*
1683    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1684    {
1685    #ifdef _OPENMP
1686            int tid=omp_get_thread_num();
1687    #else
1688            int tid=0;
1689    #endif
1690    
1691  /*  #ifdef LAZY_STACK_PROF
1692    \brief Compute the value of the expression for the given sample.          stackstart[tid]=&tid;
1693    \return Vector which stores the value of the subexpression for the given sample.          stackend[tid]=&tid;
1694    \param v A vector to store intermediate results.          const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1695    \param offset Index in v to begin storing results.          size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1696    \param sampleNo Sample number to evaluate.          #pragma omp critical
1697    \param roffset (output parameter) the offset in the return vector where the result begins.          if (d>maxstackuse)
1698            {
1699    cout << "Max resolve Stack use " << d << endl;
1700                    maxstackuse=d;
1701            }
1702            return r;
1703    #else
1704            return resolveNodeSample(tid, sampleNo, roffset);
1705    #endif
1706    }
1707    
1708    The return value will be an existing vector so do not deallocate it.  
1709  */  // This needs to do the work of the identity constructor
1710  // the vector and the offset are a place where the method could write its data if it wishes  void
1711  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::resolveToIdentity()
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
1712  {  {
1713  cout << "Resolve sample " << toString() << endl;     if (m_op==IDENTITY)
1714      // collapse so we have a 'E' node or an IDENTITY for some other type          return;
1715    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1716    {     makeIdentity(p);
1717      collapse();  }
1718    }  
1719    if (m_op==IDENTITY)    void DataLazy::makeIdentity(const DataReady_ptr& p)
1720    {
1721       m_op=IDENTITY;
1722       m_axis_offset=0;
1723       m_transpose=0;
1724       m_SL=m_SM=m_SR=0;
1725       m_children=m_height=0;
1726       m_id=p;
1727       if(p->isConstant()) {m_readytype='C';}
1728       else if(p->isExpanded()) {m_readytype='E';}
1729       else if (p->isTagged()) {m_readytype='T';}
1730       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1731       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1732       m_left.reset();
1733       m_right.reset();
1734    }
1735    
1736    
1737    DataReady_ptr
1738    DataLazy::resolve()
1739    {
1740        resolveToIdentity();
1741        return m_id;
1742    }
1743    
1744    
1745    /* This is really a static method but I think that caused problems in windows */
1746    void
1747    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1748    {
1749      if (dats.empty())
1750    {    {
1751      const ValueType& vec=m_id->getVector();          return;
     if (m_readytype=='C')  
     {  
     roffset=0;  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
     return &(vec);  
1752    }    }
1753    if (m_readytype!='E')    vector<DataLazy*> work;
1754    {    FunctionSpace fs=dats[0]->getFunctionSpace();
1755      throw DataException("Programmer Error - Collapse did not produce an expanded node.");    bool match=true;
1756      for (int i=dats.size()-1;i>=0;--i)
1757      {
1758            if (dats[i]->m_readytype!='E')
1759            {
1760                    dats[i]->collapse();
1761            }
1762            if (dats[i]->m_op!=IDENTITY)
1763            {
1764                    work.push_back(dats[i]);
1765                    if (fs!=dats[i]->getFunctionSpace())
1766                    {
1767                            match=false;
1768                    }
1769            }
1770      }
1771      if (work.empty())
1772      {
1773            return;         // no work to do
1774      }
1775      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1776      {             // it is possible that dats[0] is one of the objects which we discarded and
1777                    // all the other functionspaces match.
1778            vector<DataExpanded*> dep;
1779            vector<RealVectorType*> vecs;
1780            for (int i=0;i<work.size();++i)
1781            {
1782                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1783                    vecs.push_back(&(dep[i]->getVectorRW()));
1784            }
1785            int totalsamples=work[0]->getNumSamples();
1786            const RealVectorType* res=0; // Storage for answer
1787            int sample;
1788            #pragma omp parallel private(sample, res)
1789            {
1790                size_t roffset=0;
1791                #pragma omp for schedule(static)
1792                for (sample=0;sample<totalsamples;++sample)
1793                {
1794                    roffset=0;
1795                    int j;
1796                    for (j=work.size()-1;j>=0;--j)
1797                    {
1798    #ifdef _OPENMP
1799                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1800    #else
1801                        res=work[j]->resolveNodeSample(0,sample,roffset);
1802    #endif
1803                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1804                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1805                    }
1806                }
1807            }
1808            // Now we need to load the new results as identity ops into the lazy nodes
1809            for (int i=work.size()-1;i>=0;--i)
1810            {
1811                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1812            }
1813    }    }
1814    switch (getOpgroup(m_op))    else  // functionspaces do not match
1815    {    {
1816    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);          for (int i=0;i<work.size();++i)
1817    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);          {
1818    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);                  work[i]->resolveToIdentity();
1819    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);          }
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1820    }    }
1821  }  }
1822    
1823    
1824  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1825  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1826  DataReady_ptr  DataReady_ptr
1827  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1828  {  {
1829      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
   if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally  
1830    {    {
1831      collapse();      collapse();
1832    }    }
1833    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1834    {    {
1835      return m_id;      return m_id;
1836    }    }
1837      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1838    size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1839      // storage to evaluate its expression    RealVectorType& resvec=result->getVectorRW();
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVector();  
1840    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1841    
1842    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1843    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1844    const ValueType* res=0;   // Vector storing the answer    const RealVectorType* res=0;       // Storage for answer
1845    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1846    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
1847    for (sample=0;sample<totalsamples;++sample)    {
1848    {          size_t roffset=0;
1849  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1850            stackstart[omp_get_thread_num()]=&roffset;
1851            stackend[omp_get_thread_num()]=&roffset;
1852    #endif
1853            #pragma omp for schedule(static)
1854            for (sample=0;sample<totalsamples;++sample)
1855            {
1856                    roffset=0;
1857  #ifdef _OPENMP  #ifdef _OPENMP
1858      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1859  #else  #else
1860      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1861  #endif  #endif
1862  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1863      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1864  cerr << "offset=" << outoffset << endl;                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1865      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1866      {          }
1867      resvec[outoffset]=(*res)[resoffset];    }
1868      }  #ifdef LAZY_STACK_PROF
1869  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1870      {
1871            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1872    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1873            if (r>maxstackuse)
1874            {
1875                    maxstackuse=r;
1876            }
1877    }    }
1878      cout << "Max resolve Stack use=" << maxstackuse << endl;
1879    #endif
1880    return resptr;    return resptr;
1881  }  }
1882    
# Line 1057  std::string Line 1884  std::string
1884  DataLazy::toString() const  DataLazy::toString() const
1885  {  {
1886    ostringstream oss;    ostringstream oss;
1887    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1888    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1889      {
1890      case 1:       // tree format
1891            oss << endl;
1892            intoTreeString(oss,"");
1893            break;
1894      case 2:       // just the depth
1895            break;
1896      default:
1897            intoString(oss);
1898            break;
1899      }
1900    return oss.str();    return oss.str();
1901  }  }
1902    
# Line 1066  DataLazy::toString() const Line 1904  DataLazy::toString() const
1904  void  void
1905  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1906  {  {
1907    //    oss << "[" << m_children <<";"<<m_height <<"]";
1908    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1909    {    {
1910    case G_IDENTITY:    case G_IDENTITY:
1911      if (m_id->isExpanded())          if (m_id->isExpanded())
1912      {          {
1913         oss << "E";             oss << "E";
1914      }          }
1915      else if (m_id->isTagged())          else if (m_id->isTagged())
1916      {          {
1917        oss << "T";            oss << "T";
1918      }          }
1919      else if (m_id->isConstant())          else if (m_id->isConstant())
1920      {          {
1921        oss << "C";            oss << "C";
1922      }          }
1923      else          else
1924      {          {
1925        oss << "?";            oss << "?";
1926      }          }
1927      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1928      break;          break;
1929    case G_BINARY:    case G_BINARY:
1930      oss << '(';          oss << '(';
1931      m_left->intoString(oss);          m_left->intoString(oss);
1932      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1933      m_right->intoString(oss);          m_right->intoString(oss);
1934      oss << ')';          oss << ')';
1935      break;          break;
1936    case G_UNARY:    case G_UNARY:
1937      case G_UNARY_P:
1938    case G_NP1OUT:    case G_NP1OUT:
1939      oss << opToString(m_op) << '(';    case G_NP1OUT_P:
1940      m_left->intoString(oss);    case G_REDUCTION:
1941      oss << ')';          oss << opToString(m_op) << '(';
1942      break;          m_left->intoString(oss);
1943            oss << ')';
1944            break;
1945    case G_TENSORPROD:    case G_TENSORPROD:
1946      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1947      m_left->intoString(oss);          m_left->intoString(oss);
1948      oss << ", ";          oss << ", ";
1949      m_right->intoString(oss);          m_right->intoString(oss);
1950      oss << ')';          oss << ')';
1951      break;          break;
1952      case G_NP1OUT_2P:
1953            oss << opToString(m_op) << '(';
1954            m_left->intoString(oss);
1955            oss << ", " << m_axis_offset << ", " << m_transpose;
1956            oss << ')';
1957            break;
1958      case G_CONDEVAL:
1959            oss << opToString(m_op)<< '(' ;
1960            m_mask->intoString(oss);
1961            oss << " ? ";
1962            m_left->intoString(oss);
1963            oss << " : ";
1964            m_right->intoString(oss);
1965            oss << ')';
1966            break;
1967      default:
1968            oss << "UNKNOWN";
1969      }
1970    }
1971    
1972    
1973    void
1974    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1975    {
1976      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1977      switch (getOpgroup(m_op))
1978      {
1979      case G_IDENTITY:
1980            if (m_id->isExpanded())
1981            {
1982               oss << "E";
1983            }
1984            else if (m_id->isTagged())
1985            {
1986              oss << "T";
1987            }
1988            else if (m_id->isConstant())
1989            {
1990              oss << "C";
1991            }
1992            else
1993            {
1994              oss << "?";
1995            }
1996            oss << '@' << m_id.get() << endl;
1997            break;
1998      case G_BINARY:
1999            oss << opToString(m_op) << endl;
2000            indent+='.';
2001            m_left->intoTreeString(oss, indent);
2002            m_right->intoTreeString(oss, indent);
2003            break;
2004      case G_UNARY:
2005      case G_UNARY_P:
2006      case G_NP1OUT:
2007      case G_NP1OUT_P:
2008      case G_REDUCTION:
2009            oss << opToString(m_op) << endl;
2010            indent+='.';
2011            m_left->intoTreeString(oss, indent);
2012            break;
2013      case G_TENSORPROD:
2014            oss << opToString(m_op) << endl;
2015            indent+='.';
2016            m_left->intoTreeString(oss, indent);
2017            m_right->intoTreeString(oss, indent);
2018            break;
2019      case G_NP1OUT_2P:
2020            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2021            indent+='.';
2022            m_left->intoTreeString(oss, indent);
2023            break;
2024    default:    default:
2025      oss << "UNKNOWN";          oss << "UNKNOWN";
2026    }    }
2027  }  }
2028    
2029    
2030  DataAbstract*  DataAbstract*
2031  DataLazy::deepCopy()  DataLazy::deepCopy() const
2032  {  {
2033    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2034    {    {
2035    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2036    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
2037    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2038      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2039      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2040    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2041    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2042      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2043      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2044    default:    default:
2045      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2046    }    }
2047  }  }
2048    
2049    
2050    
2051  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2052  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2053  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
2054  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2055  // form part of the expression.  // form part of the expression.
2056  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2057  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2058  DataLazy::getLength() const  DataLazy::getLength() const
2059  {  {
2060    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1149  DataLazy::getSlice(const DataTypes::Regi Line 2069  DataLazy::getSlice(const DataTypes::Regi
2069    
2070    
2071  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2072  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2073  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2074                   int dataPointNo)                   int dataPointNo)
2075  {  {
2076    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2077    {    {
2078      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2079    }    }
2080    if (m_readytype!='E')    if (m_readytype!='E')
2081    {    {
2082      collapse();          collapse();
2083      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2084    }    }
2085    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
2086    // so we only need to know which child to ask    // so we only need to know which child to ask
2087    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
2088    {    {
2089      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2090    }    }
2091    else    else
2092    {    {
2093      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2094    }    }
2095  }  }
2096    
2097  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2098  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2099  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2100                   int dataPointNo) const                   int dataPointNo) const
2101  {  {
2102    if (m_op==IDENTITY)    if (m_op==IDENTITY)
2103    {    {
2104      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
2105    }    }
2106    if (m_readytype=='E')    if (m_readytype=='E')
2107    {    {
# Line 1189  DataLazy::getPointOffset(int sampleNo, Line 2109  DataLazy::getPointOffset(int sampleNo,
2109      // so we only need to know which child to ask      // so we only need to know which child to ask
2110      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
2111      {      {
2112      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
2113      }      }
2114      else      else
2115      {      {
2116      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
2117      }      }
2118    }    }
2119    if (m_readytype=='C')    if (m_readytype=='C')
2120    {    {
2121      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2122    }    }
2123    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2124  }  }
2125    
2126  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2127  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2128  void  void
2129  DataLazy::setToZero()  DataLazy::setToZero()
2130  {  {
2131    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2132    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2133    m_op=IDENTITY;  //   m_op=IDENTITY;
2134    m_right.reset();    //   m_right.reset();  
2135    m_left.reset();  //   m_left.reset();
2136    m_readytype='C';  //   m_readytype='C';
2137    m_buffsRequired=1;  //   m_buffsRequired=1;
2138    
2139      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2140      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2141    }
2142    
2143    bool
2144    DataLazy::actsExpanded() const
2145    {
2146            return (m_readytype=='E');
2147  }  }
2148    
2149  }   // end namespace  } // end namespace
2150    

Legend:
Removed from v.2082  
changed lines
  Added in v.6125

  ViewVC Help
Powered by ViewVC 1.1.26