/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2092 by jfenwick, Tue Nov 25 04:18:17 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 6168 by jfenwick, Wed Apr 13 03:08:12 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Apache License, version 2.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.apache.org/licenses/LICENSE-2.0
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
 #ifdef USE_NETCDF  
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
18  #include "Data.h"  #include "Data.h"
19  #include "UnaryFuncs.h"     // for escript::fsign  #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22  #include "Utils.h"  #include "Utils.h"
23    #include "DataVectorOps.h"
24    
25    #include <iomanip> // for some fancy formatting in debug
26    
27    using namespace escript::DataTypes;
28    
29    #define NO_ARG
30    
31  // #define LAZYDEBUG(X) X;  // #define LAZYDEBUG(X) if (privdebug){X;}
32  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
33    namespace
34    {
35    bool privdebug=false;
36    
37    #define ENABLEDEBUG privdebug=true;
38    #define DISABLEDEBUG privdebug=false;
39    }
40    
41    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42    
43    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44    
45    
46    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
47    
48  /*  /*
49  How does DataLazy work?  How does DataLazy work?
# Line 42  A special operation, IDENTITY, stores an Line 56  A special operation, IDENTITY, stores an
56  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
57    
58  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
59  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
60    
61  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
62  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 50  I will refer to individual DataLazy obje Line 64  I will refer to individual DataLazy obje
64    
65  Each node also stores:  Each node also stores:
66  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
67      evaluated.          evaluated.
68  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
69      evaluate the expression.          evaluate the expression.
70  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
71    
72  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 73  The convention that I use, is that the r Line 87  The convention that I use, is that the r
87  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
88  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
89    
90  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
91  1) Add to the ES_optype.  1) Add to the ES_optype.
92  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
93  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 93  namespace escript Line 107  namespace escript
107  namespace  namespace
108  {  {
109    
 enum ES_opgroup  
 {  
    G_UNKNOWN,  
    G_IDENTITY,  
    G_BINARY,        // pointwise operations with two arguments  
    G_UNARY,     // pointwise operations with one argument  
    G_NP1OUT,        // non-pointwise op with one output  
    G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter  
    G_TENSORPROD     // general tensor product  
 };  
110    
111    // enabling this will print out when ever the maximum stacksize used by resolve increases
112    // it assumes _OPENMP is also in use
113    //#define LAZY_STACK_PROF
114    
115    
116    
117  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  #ifndef _OPENMP
118              "sin","cos","tan",    #ifdef LAZY_STACK_PROF
119              "asin","acos","atan","sinh","cosh","tanh","erf",    #undef LAZY_STACK_PROF
120              "asinh","acosh","atanh",    #endif
121              "log10","log","sign","abs","neg","pos","exp","sqrt",  #endif
122              "1/","where>0","where<0","where>=0","where<=0",  
123              "symmetric","nonsymmetric",  
124              "prod",  #ifdef LAZY_STACK_PROF
125              "transpose",  std::vector<void*> stackstart(getNumberOfThreads());
126              "trace"};  std::vector<void*> stackend(getNumberOfThreads());
127  int ES_opcount=38;  size_t maxstackuse=0;
128  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  #endif
129              G_UNARY,G_UNARY,G_UNARY, //10  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17  
             G_UNARY,G_UNARY,G_UNARY,                    // 20  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33  
             G_NP1OUT,G_NP1OUT,  
             G_TENSORPROD,  
             G_NP1OUT_P, G_NP1OUT_P};  
 inline  
 ES_opgroup  
 getOpgroup(ES_optype op)  
 {  
   return opgroups[op];  
 }  
130    
131  // return the FunctionSpace of the result of "left op right"  // return the FunctionSpace of the result of "left op right"
132  FunctionSpace  FunctionSpace
133  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
134  {  {
135      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
136      // maybe we need an interpolate node -          // maybe we need an interpolate node -
137      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
138      // programming error exception.          // programming error exception.
139    
140    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
141    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
142    if (l!=r)    if (l!=r)
143    {    {
144      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
145        if (res==1)
146      {      {
147      return l;          return l;
148      }      }
149      if (l.probeInterpolation(r))      if (res==-1)
150      {      {
151      return r;          return r;
152      }      }
153      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
154    }    }
# Line 165  resultFS(DataAbstract_ptr left, DataAbst Line 160  resultFS(DataAbstract_ptr left, DataAbst
160  DataTypes::ShapeType  DataTypes::ShapeType
161  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
162  {  {
163      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
164      {          {
165        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
166        {            {
167          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
168        }            }
169        if (left->getRank()==0)   // we need to allow scalar * anything  
170        {            if (left->getRank()==0)       // we need to allow scalar * anything
171          return right->getShape();            {
172        }                  return right->getShape();
173        if (right->getRank()==0)            }
174        {            if (right->getRank()==0)
175          return left->getShape();            {
176        }                  return left->getShape();
177        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            }
178      }            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
179      return left->getShape();          }
180            return left->getShape();
181  }  }
182    
183  // return the shape for "op left"  // return the shape for "op left"
184    
185  DataTypes::ShapeType  DataTypes::ShapeType
186  resultShape(DataAbstract_ptr left, ES_optype op)  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
187  {  {
188      switch(op)          switch(op)
189      {          {
190          case TRANS:          case TRANS:
191          return left->getShape();             {                    // for the scoping of variables
192      break;                  const DataTypes::ShapeType& s=left->getShape();
193      case TRACE:                  DataTypes::ShapeType sh;
194          return DataTypes::scalarShape;                  int rank=left->getRank();
195      break;                  if (axis_offset<0 || axis_offset>rank)
196          default:                  {
197      throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");              stringstream e;
198      }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
199                throw DataException(e.str());
200            }
201            for (int i=0; i<rank; i++)
202                    {
203                       int index = (axis_offset+i)%rank;
204               sh.push_back(s[index]); // Append to new shape
205            }
206                    return sh;
207               }
208            break;
209            case TRACE:
210               {
211                    int rank=left->getRank();
212                    if (rank<2)
213                    {
214                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
215                    }
216                    if ((axis_offset>rank-2) || (axis_offset<0))
217                    {
218                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
219                    }
220                    if (rank==2)
221                    {
222                       return DataTypes::scalarShape;
223                    }
224                    else if (rank==3)
225                    {
226                       DataTypes::ShapeType sh;
227                       if (axis_offset==0)
228                       {
229                            sh.push_back(left->getShape()[2]);
230                       }
231                       else         // offset==1
232                       {
233                            sh.push_back(left->getShape()[0]);
234                       }
235                       return sh;
236                    }
237                    else if (rank==4)
238                    {
239                       DataTypes::ShapeType sh;
240                       const DataTypes::ShapeType& s=left->getShape();
241                       if (axis_offset==0)
242                       {
243                            sh.push_back(s[2]);
244                            sh.push_back(s[3]);
245                       }
246                       else if (axis_offset==1)
247                       {
248                            sh.push_back(s[0]);
249                            sh.push_back(s[3]);
250                       }
251                       else         // offset==2
252                       {
253                            sh.push_back(s[0]);
254                            sh.push_back(s[1]);
255                       }
256                       return sh;
257                    }
258                    else            // unknown rank
259                    {
260                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
261                    }
262               }
263            break;
264            default:
265            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
266            }
267  }  }
268    
269    DataTypes::ShapeType
270    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
271    {
272         // This code taken from the Data.cpp swapaxes() method
273         // Some of the checks are probably redundant here
274         int axis0_tmp,axis1_tmp;
275         const DataTypes::ShapeType& s=left->getShape();
276         DataTypes::ShapeType out_shape;
277         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
278         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
279         int rank=left->getRank();
280         if (rank<2) {
281            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
282         }
283         if (axis0<0 || axis0>rank-1) {
284            stringstream e;
285            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
286            throw DataException(e.str());
287         }
288         if (axis1<0 || axis1>rank-1) {
289            stringstream e;
290            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
291            throw DataException(e.str());
292         }
293         if (axis0 == axis1) {
294             throw DataException("Error - Data::swapaxes: axis indices must be different.");
295         }
296         if (axis0 > axis1) {
297             axis0_tmp=axis1;
298             axis1_tmp=axis0;
299         } else {
300             axis0_tmp=axis0;
301             axis1_tmp=axis1;
302         }
303         for (int i=0; i<rank; i++) {
304           if (i == axis0_tmp) {
305              out_shape.push_back(s[axis1_tmp]);
306           } else if (i == axis1_tmp) {
307              out_shape.push_back(s[axis0_tmp]);
308           } else {
309              out_shape.push_back(s[i]);
310           }
311         }
312        return out_shape;
313    }
314    
315    
316  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
317  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
318  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
319  DataTypes::ShapeType  DataTypes::ShapeType
320  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
321  {  {
322                
323    // Get rank and shape of inputs    // Get rank and shape of inputs
324    int rank0 = left->getRank();    int rank0 = left->getRank();
325    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 217  GTPShape(DataAbstract_ptr left, DataAbst Line 328  GTPShape(DataAbstract_ptr left, DataAbst
328    
329    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
330    int start0=0, start1=0;    int start0=0, start1=0;
331    if (transpose == 0)       {}    if (transpose == 0)           {}
332    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
333    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
334    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
335    
336    if (rank0<axis_offset)    if (rank0<axis_offset)
337    {    {
338      throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");          throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
339    }    }
340    
341    // Adjust the shapes for transpose    // Adjust the shapes for transpose
342    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
343    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
344    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
345    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
346    
347    // Prepare for the loops of the product    // Prepare for the loops of the product
348    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
349    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
350      SL *= tmpShape0[i];      SL *= tmpShape0[i];
351    }    }
352    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
353      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
354        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
355      }      }
356      SM *= tmpShape0[i];      SM *= tmpShape0[i];
357    }    }
358    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
359      SR *= tmpShape1[i];      SR *= tmpShape1[i];
360    }    }
361    
362    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
363    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
364    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
365       int out_index=0;       int out_index=0;
366       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
367       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
# Line 266  GTPShape(DataAbstract_ptr left, DataAbst Line 377  GTPShape(DataAbstract_ptr left, DataAbst
377    return shape2;    return shape2;
378  }  }
379    
380    }       // end anonymous namespace
381    
382  // determine the number of points in the result of "left op right"  void DataLazy::LazyNodeSetup()
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
383  {  {
384     switch(getOpgroup(op))  #ifdef _OPENMP
385     {      int numthreads=omp_get_max_threads();
386     case G_IDENTITY: return 1;      m_samples.resize(numthreads*m_samplesize);
387     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);      m_sampleids=new int[numthreads];
388     case G_UNARY: return max(left->getBuffsRequired(),1);      for (int i=0;i<numthreads;++i)
389     case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);      {
390     case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);          m_sampleids[i]=-1;  
391     case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);      }
392     default:  #else
393      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      m_samples.resize(m_samplesize);
394     }      m_sampleids=new int[1];
395  }      m_sampleids[0]=-1;
396    #endif  // _OPENMP
   
 }   // end anonymous namespace  
   
   
   
 // Return a string representing the operation  
 const std::string&  
 opToString(ES_optype op)  
 {  
   if (op<0 || op>=ES_opcount)  
   {  
     op=UNKNOWNOP;  
   }  
   return ES_opstrings[op];  
397  }  }
398    
399    
400    // Creates an identity node
401  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
402      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
403      m_op(IDENTITY),          ,m_sampleids(0),
404      m_axis_offset(0),          m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
405  {  {
406     if (p->isLazy())     if (p->isLazy())
407     {     {
408      // I don't want identity of Lazy.          // I don't want identity of Lazy.
409      // Question: Why would that be so bad?          // Question: Why would that be so bad?
410      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
411      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
412     }     }
413     else     else
414     {     {
415      m_id=dynamic_pointer_cast<DataReady>(p);          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
416      if(p->isConstant()) {m_readytype='C';}          makeIdentity(dr);
417      else if(p->isExpanded()) {m_readytype='E';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else if (p->isTagged()) {m_readytype='T';}  
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
418     }     }
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
419  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
420  }  }
421    
   
   
   
422  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
423      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
424      m_op(op),          m_op(op),
425      m_axis_offset(0),          m_axis_offset(0),
426      m_transpose(0),          m_transpose(0),
427      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
428  {  {
429     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
430     {     {
431      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
432     }     }
433    
434     DataLazy_ptr lleft;     DataLazy_ptr lleft;
435     if (!left->isLazy())     if (!left->isLazy())
436     {     {
437      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
438     }     }
439     else     else
440     {     {
441      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
442     }     }
443     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
444     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
445     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
446     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
447       m_height=m_left->m_height+1;
448       LazyNodeSetup();
449       SIZELIMIT
450  }  }
451    
452    
453  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
454  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
455      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
456      m_op(op),          m_op(op),
457      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
458  {  {
459    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
460     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
461     {     {
462      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
463     }     }
464    
465     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
466     {     {
467      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
468      Data ltemp(left);          Data ltemp(left);
469      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
470      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
471     }     }
472     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
473     {     {
474      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
475      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
476    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
477     }     }
478     left->operandCheck(*right);     left->operandCheck(*right);
479    
480     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
481     {     {
482      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
483    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
484     }     }
485     else     else
486     {     {
487      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
488    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
489     }     }
490     if (right->isLazy())     if (right->isLazy())
491     {     {
492      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
493    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
494     }     }
495     else     else
496     {     {
497      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
498    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
499     }     }
500     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
501     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
502     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
503     {     {
504      m_readytype='E';          m_readytype='E';
505     }     }
506     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
507     {     {
508      m_readytype='T';          m_readytype='T';
509     }     }
510     else     else
511     {     {
512      m_readytype='C';          m_readytype='C';
513     }     }
514     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
515     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
516     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
517       LazyNodeSetup();
518       SIZELIMIT
519  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
520  }  }
521    
522  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
523      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
524      m_op(op),          m_op(op),
525      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
526      m_transpose(transpose)          m_transpose(transpose)
527  {  {
528     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
529     {     {
530      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
531     }     }
532     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
533     {     {
534      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
535     }     }
536     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
537     {     {
538      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
539      Data ltemp(left);          Data ltemp(left);
540      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
541      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
542     }     }
543     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
544     {     {
545      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
546      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
547     }     }
548     left->operandCheck(*right);  //    left->operandCheck(*right);
549    
550     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
551     {     {
552      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
553     }     }
554     else     else
555     {     {
556      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
557     }     }
558     if (right->isLazy())     if (right->isLazy())
559     {     {
560      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
561     }     }
562     else     else
563     {     {
564      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
565     }     }
566     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
567     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
568     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
569     {     {
570      m_readytype='E';          m_readytype='E';
571     }     }
572     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
573     {     {
574      m_readytype='T';          m_readytype='T';
575     }     }
576     else     else
577     {     {
578      m_readytype='C';          m_readytype='C';
579     }     }
580     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
581     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
582     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
583       LazyNodeSetup();
584       SIZELIMIT
585  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
586  }  }
587    
588    
589  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
590      : parent(left->getFunctionSpace(), resultShape(left,op)),          : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
591      m_op(op),          m_op(op),
592      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
593      m_transpose(0)          m_transpose(0),
594            m_tol(0)
595  {  {
596     if ((getOpgroup(op)!=G_NP1OUT_P))     if ((getOpgroup(op)!=G_NP1OUT_P))
597     {     {
598      throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
599     }     }
600     DataLazy_ptr lleft;     DataLazy_ptr lleft;
601     if (!left->isLazy())     if (!left->isLazy())
602     {     {
603      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
604     }     }
605     else     else
606     {     {
607      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
608     }     }
609     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
610     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
611     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
612     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
613       m_height=m_left->m_height+1;
614       LazyNodeSetup();
615       SIZELIMIT
616  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
617  }  }
618    
619    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
620            : parent(left->getFunctionSpace(), left->getShape()),
621            m_op(op),
622            m_axis_offset(0),
623            m_transpose(0),
624            m_tol(tol)
625    {
626       if ((getOpgroup(op)!=G_UNARY_P))
627       {
628            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
629       }
630       DataLazy_ptr lleft;
631       if (!left->isLazy())
632       {
633            lleft=DataLazy_ptr(new DataLazy(left));
634       }
635       else
636       {
637            lleft=dynamic_pointer_cast<DataLazy>(left);
638       }
639       m_readytype=lleft->m_readytype;
640       m_left=lleft;
641       m_samplesize=getNumDPPSample()*getNoValues();
642       m_children=m_left->m_children+1;
643       m_height=m_left->m_height+1;
644       LazyNodeSetup();
645       SIZELIMIT
646    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
647    }
648    
649    
650  DataLazy::~DataLazy()  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
651            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
652            m_op(op),
653            m_axis_offset(axis0),
654            m_transpose(axis1),
655            m_tol(0)
656  {  {
657       if ((getOpgroup(op)!=G_NP1OUT_2P))
658       {
659            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
660       }
661       DataLazy_ptr lleft;
662       if (!left->isLazy())
663       {
664            lleft=DataLazy_ptr(new DataLazy(left));
665       }
666       else
667       {
668            lleft=dynamic_pointer_cast<DataLazy>(left);
669       }
670       m_readytype=lleft->m_readytype;
671       m_left=lleft;
672       m_samplesize=getNumDPPSample()*getNoValues();
673       m_children=m_left->m_children+1;
674       m_height=m_left->m_height+1;
675       LazyNodeSetup();
676       SIZELIMIT
677    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
678  }  }
679    
680    
681  int  namespace
 DataLazy::getBuffsRequired() const  
682  {  {
683      return m_buffsRequired;  
684        inline int max3(int a, int b, int c)
685        {
686            int t=(a>b?a:b);
687            return (t>c?t:c);
688    
689        }
690  }  }
691    
692    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
693            : parent(left->getFunctionSpace(), left->getShape()),
694            m_op(CONDEVAL),
695            m_axis_offset(0),
696            m_transpose(0),
697            m_tol(0)
698    {
699    
700  size_t     DataLazy_ptr lmask;
701  DataLazy::getMaxSampleSize() const     DataLazy_ptr lleft;
702       DataLazy_ptr lright;
703       if (!mask->isLazy())
704       {
705            lmask=DataLazy_ptr(new DataLazy(mask));
706       }
707       else
708       {
709            lmask=dynamic_pointer_cast<DataLazy>(mask);
710       }
711       if (!left->isLazy())
712       {
713            lleft=DataLazy_ptr(new DataLazy(left));
714       }
715       else
716       {
717            lleft=dynamic_pointer_cast<DataLazy>(left);
718       }
719       if (!right->isLazy())
720       {
721            lright=DataLazy_ptr(new DataLazy(right));
722       }
723       else
724       {
725            lright=dynamic_pointer_cast<DataLazy>(right);
726       }
727       m_readytype=lmask->m_readytype;
728       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
729       {
730            throw DataException("Programmer Error - condEval arguments must have the same readytype");
731       }
732       m_left=lleft;
733       m_right=lright;
734       m_mask=lmask;
735       m_samplesize=getNumDPPSample()*getNoValues();
736       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
737       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
738       LazyNodeSetup();
739       SIZELIMIT
740    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
741    }
742    
743    
744    
745    DataLazy::~DataLazy()
746  {  {
747      return m_maxsamplesize;     delete[] m_sampleids;
748  }  }
749    
750    
751  /*  /*
752    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
753    This does the work for the collapse method.    This does the work for the collapse method.
754    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
755  */  */
756  DataReady_ptr  DataReady_ptr
757  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
758  {  {
759    if (m_readytype=='E')    if (m_readytype=='E')
760    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
761      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
762    }    }
763    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 577  DataLazy::collapseToReady() Line 775  DataLazy::collapseToReady()
775    switch(m_op)    switch(m_op)
776    {    {
777      case ADD:      case ADD:
778      result=left+right;          result=left+right;
779      break;          break;
780      case SUB:            case SUB:          
781      result=left-right;          result=left-right;
782      break;          break;
783      case MUL:            case MUL:          
784      result=left*right;          result=left*right;
785      break;          break;
786      case DIV:            case DIV:          
787      result=left/right;          result=left/right;
788      break;          break;
789      case SIN:      case SIN:
790      result=left.sin();            result=left.sin();      
791      break;          break;
792      case COS:      case COS:
793      result=left.cos();          result=left.cos();
794      break;          break;
795      case TAN:      case TAN:
796      result=left.tan();          result=left.tan();
797      break;          break;
798      case ASIN:      case ASIN:
799      result=left.asin();          result=left.asin();
800      break;          break;
801      case ACOS:      case ACOS:
802      result=left.acos();          result=left.acos();
803      break;          break;
804      case ATAN:      case ATAN:
805      result=left.atan();          result=left.atan();
806      break;          break;
807      case SINH:      case SINH:
808      result=left.sinh();          result=left.sinh();
809      break;          break;
810      case COSH:      case COSH:
811      result=left.cosh();          result=left.cosh();
812      break;          break;
813      case TANH:      case TANH:
814      result=left.tanh();          result=left.tanh();
815      break;          break;
816      case ERF:      case ERF:
817      result=left.erf();          result=left.erf();
818      break;          break;
819     case ASINH:     case ASINH:
820      result=left.asinh();          result=left.asinh();
821      break;          break;
822     case ACOSH:     case ACOSH:
823      result=left.acosh();          result=left.acosh();
824      break;          break;
825     case ATANH:     case ATANH:
826      result=left.atanh();          result=left.atanh();
827      break;          break;
828      case LOG10:      case LOG10:
829      result=left.log10();          result=left.log10();
830      break;          break;
831      case LOG:      case LOG:
832      result=left.log();          result=left.log();
833      break;          break;
834      case SIGN:      case SIGN:
835      result=left.sign();          result=left.sign();
836      break;          break;
837      case ABS:      case ABS:
838      result=left.abs();          result=left.abs();
839      break;          break;
840      case NEG:      case NEG:
841      result=left.neg();          result=left.neg();
842      break;          break;
843      case POS:      case POS:
844      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
845      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
846      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
847      break;          break;
848      case EXP:      case EXP:
849      result=left.exp();          result=left.exp();
850      break;          break;
851      case SQRT:      case SQRT:
852      result=left.sqrt();          result=left.sqrt();
853      break;          break;
854      case RECIP:      case RECIP:
855      result=left.oneOver();          result=left.oneOver();
856      break;          break;
857      case GZ:      case GZ:
858      result=left.wherePositive();          result=left.wherePositive();
859      break;          break;
860      case LZ:      case LZ:
861      result=left.whereNegative();          result=left.whereNegative();
862      break;          break;
863      case GEZ:      case GEZ:
864      result=left.whereNonNegative();          result=left.whereNonNegative();
865      break;          break;
866      case LEZ:      case LEZ:
867      result=left.whereNonPositive();          result=left.whereNonPositive();
868      break;          break;
869        case NEZ:
870            result=left.whereNonZero(m_tol);
871            break;
872        case EZ:
873            result=left.whereZero(m_tol);
874            break;
875      case SYM:      case SYM:
876      result=left.symmetric();          result=left.symmetric();
877      break;          break;
878      case NSYM:      case NSYM:
879      result=left.nonsymmetric();          result=left.antisymmetric();
880      break;          break;
881      case PROD:      case PROD:
882      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
883      break;          break;
884      case TRANS:      case TRANS:
885      result=left.transpose(m_axis_offset);          result=left.transpose(m_axis_offset);
886      break;          break;
887      case TRACE:      case TRACE:
888      result=left.trace(m_axis_offset);          result=left.trace(m_axis_offset);
889            break;
890        case SWAP:
891            result=left.swapaxes(m_axis_offset, m_transpose);
892            break;
893        case MINVAL:
894            result=left.minval();
895            break;
896        case MAXVAL:
897            result=left.minval();
898            break;
899        case HER:
900        result=left.hermitian();
901      break;      break;
902      default:      default:
903      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
904    }    }
905    return result.borrowReadyPtr();    return result.borrowReadyPtr();
906  }  }
# Line 696  DataLazy::collapseToReady() Line 912  DataLazy::collapseToReady()
912     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
913  */  */
914  void  void
915  DataLazy::collapse()  DataLazy::collapse() const
916  {  {
917    if (m_op==IDENTITY)    if (m_op==IDENTITY)
918    {    {
919      return;          return;
920    }    }
921    if (m_readytype=='E')    if (m_readytype=='E')
922    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
923      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
924    }    }
925    m_id=collapseToReady();    m_id=collapseToReady();
926    m_op=IDENTITY;    m_op=IDENTITY;
927  }  }
928    
929  /*  // The result will be stored in m_samples
930    \brief Compute the value of the expression (unary operation) for the given sample.  // The return value is a pointer to the DataVector, offset is the offset within the return value
931    \return Vector which stores the value of the subexpression for the given sample.  const DataTypes::RealVectorType*
932    \param v A vector to store intermediate results.  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
933  {  {
934      // we assume that any collapsing has been done before we get here  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
935      // since we only have one argument we don't need to think about only          // collapse so we have a 'E' node or an IDENTITY for some other type
936      // processing single points.    if (m_readytype!='E' && m_op!=IDENTITY)
937      {
938            collapse();
939      }
940      if (m_op==IDENTITY)  
941      {
942        const RealVectorType& vec=m_id->getVectorRO();
943        roffset=m_id->getPointOffset(sampleNo, 0);
944    #ifdef LAZY_STACK_PROF
945    int x;
946    if (&x<stackend[omp_get_thread_num()])
947    {
948           stackend[omp_get_thread_num()]=&x;
949    }
950    #endif
951        return &(vec);
952      }
953      if (m_readytype!='E')
954      {
955        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
956      }
957      if (m_sampleids[tid]==sampleNo)
958      {
959            roffset=tid*m_samplesize;
960            return &(m_samples);            // sample is already resolved
961      }
962      m_sampleids[tid]=sampleNo;
963    
964      switch (getOpgroup(m_op))
965      {
966      case G_UNARY:
967      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
968      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
969      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
970      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
971      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
972      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
973      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
974      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
975      default:
976        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
977      }
978    }
979    
980    const DataTypes::RealVectorType*
981    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
982    {
983            // we assume that any collapsing has been done before we get here
984            // since we only have one argument we don't need to think about only
985            // processing single points.
986            // we will also know we won't get identity nodes
987    if (m_readytype!='E')    if (m_readytype!='E')
988    {    {
989      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
990    }    }
991    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
   const double* left=&((*vleft)[roffset]);  
   double* result=&(v[offset]);  
   roffset=offset;  
   switch (m_op)  
992    {    {
993      case SIN:        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
994      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);    }
995      break;    const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
996      case COS:    const double* left=&((*leftres)[roffset]);
997      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);    roffset=m_samplesize*tid;
998      break;    double* result=&(m_samples[roffset]);
999      case TAN:    if (m_op==POS)
1000      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);    {
1001      break;      // this should be prevented earlier
1002      case ASIN:      // operation is meaningless for lazy
1003      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);          throw DataException("Programmer error - POS not supported for lazy data.");    
1004      break;    }
1005      case ACOS:    tensor_unary_array_operation(m_samplesize,
1006      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);                               left,
1007      break;                               result,
1008      case ATAN:                               m_op,
1009      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);                               m_tol);  
1010      break;    return &(m_samples);
1011      case SINH:  }
1012      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);  
     break;  
     case COSH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);  
     break;  
     case TANH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);  
     break;  
     case ERF:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
    case ASINH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
    case ACOSH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
    case ATANH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
     case LOG10:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);  
     break;  
     case LOG:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);  
     break;  
     case SIGN:  
     tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
     break;  
     case ABS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);  
     break;  
     case NEG:  
     tensor_unary_operation(m_samplesize, left, result, negate<double>());  
     break;  
     case POS:  
     // it doesn't mean anything for delayed.  
     // it will just trigger a deep copy of the lazy object  
     throw DataException("Programmer error - POS not supported for lazy data.");  
     break;  
     case EXP:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);  
     break;  
     case SQRT:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);  
     break;  
     case RECIP:  
     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
     break;  
     case GZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
     break;  
     case LZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
     break;  
     case GEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
     break;  
     case LEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
     break;  
1013    
1014    const DataTypes::RealVectorType*
1015    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1016    {
1017            // we assume that any collapsing has been done before we get here
1018            // since we only have one argument we don't need to think about only
1019            // processing single points.
1020            // we will also know we won't get identity nodes
1021      if (m_readytype!='E')
1022      {
1023        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1024      }
1025      if (m_op==IDENTITY)
1026      {
1027        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1028      }
1029      size_t loffset=0;
1030      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1031    
1032      roffset=m_samplesize*tid;
1033      unsigned int ndpps=getNumDPPSample();
1034      unsigned int psize=DataTypes::noValues(m_left->getShape());
1035      double* result=&(m_samples[roffset]);
1036      switch (m_op)
1037      {
1038        case MINVAL:
1039            {
1040              for (unsigned int z=0;z<ndpps;++z)
1041              {
1042                FMin op;
1043                *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1044                loffset+=psize;
1045                result++;
1046              }
1047            }
1048            break;
1049        case MAXVAL:
1050            {
1051              for (unsigned int z=0;z<ndpps;++z)
1052              {
1053              FMax op;
1054              *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1055              loffset+=psize;
1056              result++;
1057              }
1058            }
1059            break;
1060      default:      default:
1061      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1062    }    }
1063    return &v;    return &(m_samples);
1064  }  }
1065    
1066    const DataTypes::RealVectorType*
1067  /*  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1068  {  {
1069      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1070      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1071      // processing single points.          // processing single points.
1072    if (m_readytype!='E')    if (m_readytype!='E')
1073    {    {
1074      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1075    }    }
1076      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1077    size_t subroffset=roffset+m_samplesize;    {
1078    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1079    roffset=offset;    }
1080      size_t subroffset;
1081      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1082      roffset=m_samplesize*tid;
1083      size_t loop=0;
1084      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1085      size_t step=getNoValues();
1086      size_t offset=roffset;
1087    switch (m_op)    switch (m_op)
1088    {    {
1089      case SYM:      case SYM:
1090      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1091      break;          {
1092                escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1093                subroffset+=step;
1094                offset+=step;
1095            }
1096            break;
1097      case NSYM:      case NSYM:
1098      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          for (loop=0;loop<numsteps;++loop)
1099      break;          {
1100                escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1101                subroffset+=step;
1102                offset+=step;
1103            }
1104            break;
1105      default:      default:
1106      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1107    }    }
1108    return &v;    return &m_samples;
1109  }  }
1110    
1111  /*  const DataTypes::RealVectorType*
1112    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1113  {  {
1114      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1115      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1116      // processing single points.          // processing single points.
1117    if (m_readytype!='E')    if (m_readytype!='E')
1118    {    {
1119      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1120    }    }
1121      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1122    size_t subroffset=roffset+m_samplesize;    {
1123    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1124    roffset=offset;    }
1125      size_t subroffset;
1126      size_t offset;
1127      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1128      roffset=m_samplesize*tid;
1129      offset=roffset;
1130      size_t loop=0;
1131      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1132      size_t outstep=getNoValues();
1133      size_t instep=m_left->getNoValues();
1134    switch (m_op)    switch (m_op)
1135    {    {
1136      case TRACE:      case TRACE:
1137           DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);          for (loop=0;loop<numsteps;++loop)
1138      break;          {
1139                escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1140                subroffset+=instep;
1141                offset+=outstep;
1142            }
1143            break;
1144      case TRANS:      case TRANS:
1145           DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);          for (loop=0;loop<numsteps;++loop)
1146      break;          {
1147                escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1148                subroffset+=instep;
1149                offset+=outstep;
1150            }
1151            break;
1152        default:
1153            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1154      }
1155      return &m_samples;
1156    }
1157    
1158    
1159    const DataTypes::RealVectorType*
1160    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1161    {
1162      if (m_readytype!='E')
1163      {
1164        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1165      }
1166      if (m_op==IDENTITY)
1167      {
1168        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1169      }
1170      size_t subroffset;
1171      size_t offset;
1172      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1173      roffset=m_samplesize*tid;
1174      offset=roffset;
1175      size_t loop=0;
1176      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1177      size_t outstep=getNoValues();
1178      size_t instep=m_left->getNoValues();
1179      switch (m_op)
1180      {
1181        case SWAP:
1182            for (loop=0;loop<numsteps;++loop)
1183            {
1184                escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1185                subroffset+=instep;
1186                offset+=outstep;
1187            }
1188            break;
1189      default:      default:
1190      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1191    }    }
1192    return &v;    return &m_samples;
1193  }  }
1194    
1195    const DataTypes::RealVectorType*
1196    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1197    {
1198      if (m_readytype!='E')
1199      {
1200        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1201      }
1202      if (m_op!=CONDEVAL)
1203      {
1204        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1205      }
1206      size_t subroffset;
1207    
1208  #define PROC_OP(TYPE,X)                               \    const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1209      for (int i=0;i<steps;++i,resultp+=resultStep) \    const RealVectorType* srcres=0;
1210      { \    if ((*maskres)[subroffset]>0)
1211         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    {
1212         lroffset+=leftStep; \          srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1213         rroffset+=rightStep; \    }
1214      }    else
1215      {
1216            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1217      }
1218    
1219      // Now we need to copy the result
1220    
1221      roffset=m_samplesize*tid;
1222      for (int i=0;i<m_samplesize;++i)
1223      {
1224            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1225      }
1226    
1227      return &m_samples;
1228    }
1229    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1230  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1231  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1232  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 952  DataLazy::resolveNP1OUT_P(ValueType& v, Line 1236  DataLazy::resolveNP1OUT_P(ValueType& v,
1236  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1237  // For example, 2+Vector.  // For example, 2+Vector.
1238  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1239  DataTypes::ValueType*  const DataTypes::RealVectorType*
1240  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1241  {  {
1242  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1243    
1244    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1245      // first work out which of the children are expanded          // first work out which of the children are expanded
1246    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1247    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1248    if (!leftExp && !rightExp)    if (!leftExp && !rightExp)
1249    {    {
1250      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1251    }    }
1252    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1253    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1254    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1255    int steps=(bigloops?1:getNumDPPSample());    {
1256    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1257    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1258    {    size_t leftsize=m_left->getNoValues();
1259      if (!leftScalar && !rightScalar)    size_t rightsize=m_right->getNoValues();
1260      {    size_t chunksize=1;                   // how many doubles will be processed in one go
1261         throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");    int leftstep=0;               // how far should the left offset advance after each step
1262      }    int rightstep=0;
1263      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    int numsteps=0;               // total number of steps for the inner loop
1264      chunksize=1;    // for scalar    int oleftstep=0;      // the o variables refer to the outer loop
1265    }        int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1266    int leftStep=((leftExp && (!rightExp || rightScalar))? m_right->getNoValues() : 0);    int onumsteps=1;
1267    int rightStep=((rightExp && (!leftExp || leftScalar))? m_left->getNoValues() : 0);    
1268    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1269      // Get the values of sub-expressions    bool RES=(rightExp && rightScalar);
1270    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    bool LS=(!leftExp && leftScalar);     // left is a single scalar
1271    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    bool RS=(!rightExp && rightScalar);
1272      // the right child starts further along.    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1273    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    bool RN=(!rightExp && !rightScalar);
1274      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1275      bool REN=(rightExp && !rightScalar);
1276    
1277      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1278      {
1279            chunksize=m_left->getNumDPPSample()*leftsize;
1280            leftstep=0;
1281            rightstep=0;
1282            numsteps=1;
1283      }
1284      else if (LES || RES)
1285      {
1286            chunksize=1;
1287            if (LES)                // left is an expanded scalar
1288            {
1289                    if (RS)
1290                    {
1291                       leftstep=1;
1292                       rightstep=0;
1293                       numsteps=m_left->getNumDPPSample();
1294                    }
1295                    else            // RN or REN
1296                    {
1297                       leftstep=0;
1298                       oleftstep=1;
1299                       rightstep=1;
1300                       orightstep=(RN ? -(int)rightsize : 0);
1301                       numsteps=rightsize;
1302                       onumsteps=m_left->getNumDPPSample();
1303                    }
1304            }
1305            else            // right is an expanded scalar
1306            {
1307                    if (LS)
1308                    {
1309                       rightstep=1;
1310                       leftstep=0;
1311                       numsteps=m_right->getNumDPPSample();
1312                    }
1313                    else
1314                    {
1315                       rightstep=0;
1316                       orightstep=1;
1317                       leftstep=1;
1318                       oleftstep=(LN ? -(int)leftsize : 0);
1319                       numsteps=leftsize;
1320                       onumsteps=m_right->getNumDPPSample();
1321                    }
1322            }
1323      }
1324      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1325      {
1326            if (LEN)        // and Right will be a single value
1327            {
1328                    chunksize=rightsize;
1329                    leftstep=rightsize;
1330                    rightstep=0;
1331                    numsteps=m_left->getNumDPPSample();
1332                    if (RS)
1333                    {
1334                       numsteps*=leftsize;
1335                    }
1336            }
1337            else    // REN
1338            {
1339                    chunksize=leftsize;
1340                    rightstep=leftsize;
1341                    leftstep=0;
1342                    numsteps=m_right->getNumDPPSample();
1343                    if (LS)
1344                    {
1345                       numsteps*=rightsize;
1346                    }
1347            }
1348      }
1349    
1350      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1351            // Get the values of sub-expressions
1352      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1353      const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1354    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1355    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1356    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1357    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1358    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1359    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1360    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1361    
1362    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1363    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1364    
1365    
1366      roffset=m_samplesize*tid;
1367      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1368    switch(m_op)    switch(m_op)
1369    {    {
1370      case ADD:      case ADD:
1371          PROC_OP(NO_ARG,plus<double>());          //PROC_OP(NO_ARG,plus<double>());
1372      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1373                 &(*left)[0],
1374                 &(*right)[0],
1375                 chunksize,
1376                 onumsteps,
1377                 numsteps,
1378                 resultStep,
1379                 leftstep,
1380                 rightstep,
1381                 oleftstep,
1382                 orightstep,
1383                 lroffset,
1384                 rroffset,
1385                 escript::ES_optype::ADD);  
1386            break;
1387      case SUB:      case SUB:
1388      PROC_OP(NO_ARG,minus<double>());        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1389      break;               &(*left)[0],
1390                 &(*right)[0],
1391                 chunksize,
1392                 onumsteps,
1393                 numsteps,
1394                 resultStep,
1395                 leftstep,
1396                 rightstep,
1397                 oleftstep,
1398                 orightstep,
1399                 lroffset,
1400                 rroffset,
1401                 escript::ES_optype::SUB);        
1402            //PROC_OP(NO_ARG,minus<double>());
1403            break;
1404      case MUL:      case MUL:
1405      PROC_OP(NO_ARG,multiplies<double>());          //PROC_OP(NO_ARG,multiplies<double>());
1406      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1407                 &(*left)[0],
1408                 &(*right)[0],
1409                 chunksize,
1410                 onumsteps,
1411                 numsteps,
1412                 resultStep,
1413                 leftstep,
1414                 rightstep,
1415                 oleftstep,
1416                 orightstep,
1417                 lroffset,
1418                 rroffset,
1419                 escript::ES_optype::MUL);        
1420            break;
1421      case DIV:      case DIV:
1422      PROC_OP(NO_ARG,divides<double>());          //PROC_OP(NO_ARG,divides<double>());
1423      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1424                 &(*left)[0],
1425                 &(*right)[0],
1426                 chunksize,
1427                 onumsteps,
1428                 numsteps,
1429                 resultStep,
1430                 leftstep,
1431                 rightstep,
1432                 oleftstep,
1433                 orightstep,
1434                 lroffset,
1435                 rroffset,
1436                 escript::ES_optype::DIV);        
1437            break;
1438      case POW:      case POW:
1439         PROC_OP(double (double,double),::pow);         //PROC_OP(double (double,double),::pow);
1440      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1441                 &(*left)[0],
1442                 &(*right)[0],
1443                 chunksize,
1444                 onumsteps,
1445                 numsteps,
1446                 resultStep,
1447                 leftstep,
1448                 rightstep,
1449                 oleftstep,
1450                 orightstep,
1451                 lroffset,
1452                 rroffset,
1453                 escript::ES_optype::POW);        
1454            break;
1455      default:      default:
1456      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1457    }    }
1458    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1459    return &v;    return &m_samples;
1460  }  }
1461    
1462    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1463  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1464  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1465  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1466  DataTypes::ValueType*  const DataTypes::RealVectorType*
1467  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1468  {  {
1469  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1470    
1471    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1472      // first work out which of the children are expanded          // first work out which of the children are expanded
1473    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1474    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1475    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1476    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1477    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1478    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1479      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1480    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1481    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1482    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1483      const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1484    
1485      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1486    
1487    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1488    cout << getNoValues() << endl;)
1489    
1490    
1491    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1492    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1493    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1494    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1495    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1496    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1497    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1498    
1499      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1500    switch(m_op)    switch(m_op)
1501    {    {
1502      case PROD:      case PROD:
1503      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1504      {          {
1505            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1506            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1507            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
1508        lroffset+=leftStep;  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1509        rroffset+=rightStep;  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1510      }  
1511      break;            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1512    
1513              lroffset+=leftStep;
1514              rroffset+=rightStep;
1515            }
1516            break;
1517      default:      default:
1518      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1519    }    }
1520    roffset=offset;    roffset=offset;
1521    return &v;    return &m_samples;
1522  }  }
1523    
1524    
1525    const DataTypes::RealVectorType*
1526    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1527    {
1528    #ifdef _OPENMP
1529            int tid=omp_get_thread_num();
1530    #else
1531            int tid=0;
1532    #endif
1533    
1534  /*  #ifdef LAZY_STACK_PROF
1535    \brief Compute the value of the expression for the given sample.          stackstart[tid]=&tid;
1536    \return Vector which stores the value of the subexpression for the given sample.          stackend[tid]=&tid;
1537    \param v A vector to store intermediate results.          const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1538    \param offset Index in v to begin storing results.          size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1539    \param sampleNo Sample number to evaluate.          #pragma omp critical
1540    \param roffset (output parameter) the offset in the return vector where the result begins.          if (d>maxstackuse)
1541            {
1542    cout << "Max resolve Stack use " << d << endl;
1543                    maxstackuse=d;
1544            }
1545            return r;
1546    #else
1547            return resolveNodeSample(tid, sampleNo, roffset);
1548    #endif
1549    }
1550    
1551    The return value will be an existing vector so do not deallocate it.  
1552  */  // This needs to do the work of the identity constructor
1553  // the vector and the offset are a place where the method could write its data if it wishes  void
1554  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::resolveToIdentity()
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
1555  {  {
1556  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)     if (m_op==IDENTITY)
1557      // collapse so we have a 'E' node or an IDENTITY for some other type          return;
1558    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1559    {     makeIdentity(p);
1560      collapse();  }
1561    }  
1562    if (m_op==IDENTITY)    void DataLazy::makeIdentity(const DataReady_ptr& p)
1563    {
1564       m_op=IDENTITY;
1565       m_axis_offset=0;
1566       m_transpose=0;
1567       m_SL=m_SM=m_SR=0;
1568       m_children=m_height=0;
1569       m_id=p;
1570       if(p->isConstant()) {m_readytype='C';}
1571       else if(p->isExpanded()) {m_readytype='E';}
1572       else if (p->isTagged()) {m_readytype='T';}
1573       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1574       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1575       m_left.reset();
1576       m_right.reset();
1577    }
1578    
1579    
1580    DataReady_ptr
1581    DataLazy::resolve()
1582    {
1583        resolveToIdentity();
1584        return m_id;
1585    }
1586    
1587    
1588    /* This is really a static method but I think that caused problems in windows */
1589    void
1590    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1591    {
1592      if (dats.empty())
1593    {    {
1594      const ValueType& vec=m_id->getVector();          return;
     if (m_readytype=='C')  
     {  
     roffset=0;  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
     return &(vec);  
1595    }    }
1596    if (m_readytype!='E')    vector<DataLazy*> work;
1597    {    FunctionSpace fs=dats[0]->getFunctionSpace();
1598      throw DataException("Programmer Error - Collapse did not produce an expanded node.");    bool match=true;
1599      for (int i=dats.size()-1;i>=0;--i)
1600      {
1601            if (dats[i]->m_readytype!='E')
1602            {
1603                    dats[i]->collapse();
1604            }
1605            if (dats[i]->m_op!=IDENTITY)
1606            {
1607                    work.push_back(dats[i]);
1608                    if (fs!=dats[i]->getFunctionSpace())
1609                    {
1610                            match=false;
1611                    }
1612            }
1613      }
1614      if (work.empty())
1615      {
1616            return;         // no work to do
1617      }
1618      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1619      {             // it is possible that dats[0] is one of the objects which we discarded and
1620                    // all the other functionspaces match.
1621            vector<DataExpanded*> dep;
1622            vector<RealVectorType*> vecs;
1623            for (int i=0;i<work.size();++i)
1624            {
1625                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1626                    vecs.push_back(&(dep[i]->getVectorRW()));
1627            }
1628            int totalsamples=work[0]->getNumSamples();
1629            const RealVectorType* res=0; // Storage for answer
1630            int sample;
1631            #pragma omp parallel private(sample, res)
1632            {
1633                size_t roffset=0;
1634                #pragma omp for schedule(static)
1635                for (sample=0;sample<totalsamples;++sample)
1636                {
1637                    roffset=0;
1638                    int j;
1639                    for (j=work.size()-1;j>=0;--j)
1640                    {
1641    #ifdef _OPENMP
1642                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1643    #else
1644                        res=work[j]->resolveNodeSample(0,sample,roffset);
1645    #endif
1646                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1647                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1648                    }
1649                }
1650            }
1651            // Now we need to load the new results as identity ops into the lazy nodes
1652            for (int i=work.size()-1;i>=0;--i)
1653            {
1654                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1655            }
1656    }    }
1657    switch (getOpgroup(m_op))    else  // functionspaces do not match
1658    {    {
1659    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);          for (int i=0;i<work.size();++i)
1660    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);          {
1661    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);                  work[i]->resolveToIdentity();
1662    case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);          }
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1663    }    }
1664  }  }
1665    
1666    
1667  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1668  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1669  DataReady_ptr  DataReady_ptr
1670  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1671  {  {
1672      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  
   
   if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally  
1673    {    {
1674      collapse();      collapse();
1675    }    }
1676    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1677    {    {
1678      return m_id;      return m_id;
1679    }    }
1680      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1681    size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1682      // storage to evaluate its expression    RealVectorType& resvec=result->getVectorRW();
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVector();  
1683    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1684    
1685    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1686    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1687    const ValueType* res=0;   // Vector storing the answer    const RealVectorType* res=0;       // Storage for answer
1688    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1689    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
1690    for (sample=0;sample<totalsamples;++sample)    {
1691    {          size_t roffset=0;
1692  LAZYDEBUG(cout << "################################# " << sample << endl;)  #ifdef LAZY_STACK_PROF
1693            stackstart[omp_get_thread_num()]=&roffset;
1694            stackend[omp_get_thread_num()]=&roffset;
1695    #endif
1696            #pragma omp for schedule(static)
1697            for (sample=0;sample<totalsamples;++sample)
1698            {
1699                    roffset=0;
1700  #ifdef _OPENMP  #ifdef _OPENMP
1701      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1702  #else  #else
1703      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1704  #endif  #endif
1705  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1706      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1707  LAZYDEBUG(cerr << "offset=" << outoffset << endl;)                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1708      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1709      {          }
1710      resvec[outoffset]=(*res)[resoffset];    }
1711      }  #ifdef LAZY_STACK_PROF
1712  LAZYDEBUG(cerr << "*********************************" << endl;)    for (int i=0;i<getNumberOfThreads();++i)
1713      {
1714            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1715    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1716            if (r>maxstackuse)
1717            {
1718                    maxstackuse=r;
1719            }
1720    }    }
1721      cout << "Max resolve Stack use=" << maxstackuse << endl;
1722    #endif
1723    return resptr;    return resptr;
1724  }  }
1725    
# Line 1177  std::string Line 1727  std::string
1727  DataLazy::toString() const  DataLazy::toString() const
1728  {  {
1729    ostringstream oss;    ostringstream oss;
1730    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1731    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1732      {
1733      case 1:       // tree format
1734            oss << endl;
1735            intoTreeString(oss,"");
1736            break;
1737      case 2:       // just the depth
1738            break;
1739      default:
1740            intoString(oss);
1741            break;
1742      }
1743    return oss.str();    return oss.str();
1744  }  }
1745    
# Line 1186  DataLazy::toString() const Line 1747  DataLazy::toString() const
1747  void  void
1748  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1749  {  {
1750    //    oss << "[" << m_children <<";"<<m_height <<"]";
1751    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1752    {    {
1753    case G_IDENTITY:    case G_IDENTITY:
1754      if (m_id->isExpanded())          if (m_id->isExpanded())
1755      {          {
1756         oss << "E";             oss << "E";
1757      }          }
1758      else if (m_id->isTagged())          else if (m_id->isTagged())
1759      {          {
1760        oss << "T";            oss << "T";
1761      }          }
1762      else if (m_id->isConstant())          else if (m_id->isConstant())
1763      {          {
1764        oss << "C";            oss << "C";
1765      }          }
1766      else          else
1767      {          {
1768        oss << "?";            oss << "?";
1769      }          }
1770      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1771      break;          break;
1772    case G_BINARY:    case G_BINARY:
1773      oss << '(';          oss << '(';
1774      m_left->intoString(oss);          m_left->intoString(oss);
1775      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1776      m_right->intoString(oss);          m_right->intoString(oss);
1777      oss << ')';          oss << ')';
1778      break;          break;
1779    case G_UNARY:    case G_UNARY:
1780      case G_UNARY_P:
1781    case G_NP1OUT:    case G_NP1OUT:
1782    case G_NP1OUT_P:    case G_NP1OUT_P:
1783      oss << opToString(m_op) << '(';    case G_REDUCTION:
1784      m_left->intoString(oss);          oss << opToString(m_op) << '(';
1785      oss << ')';          m_left->intoString(oss);
1786      break;          oss << ')';
1787            break;
1788    case G_TENSORPROD:    case G_TENSORPROD:
1789      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1790      m_left->intoString(oss);          m_left->intoString(oss);
1791      oss << ", ";          oss << ", ";
1792      m_right->intoString(oss);          m_right->intoString(oss);
1793      oss << ')';          oss << ')';
1794      break;          break;
1795      case G_NP1OUT_2P:
1796            oss << opToString(m_op) << '(';
1797            m_left->intoString(oss);
1798            oss << ", " << m_axis_offset << ", " << m_transpose;
1799            oss << ')';
1800            break;
1801      case G_CONDEVAL:
1802            oss << opToString(m_op)<< '(' ;
1803            m_mask->intoString(oss);
1804            oss << " ? ";
1805            m_left->intoString(oss);
1806            oss << " : ";
1807            m_right->intoString(oss);
1808            oss << ')';
1809            break;
1810    default:    default:
1811      oss << "UNKNOWN";          oss << "UNKNOWN";
1812    }    }
1813  }  }
1814    
1815    
1816    void
1817    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1818    {
1819      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1820      switch (getOpgroup(m_op))
1821      {
1822      case G_IDENTITY:
1823            if (m_id->isExpanded())
1824            {
1825               oss << "E";
1826            }
1827            else if (m_id->isTagged())
1828            {
1829              oss << "T";
1830            }
1831            else if (m_id->isConstant())
1832            {
1833              oss << "C";
1834            }
1835            else
1836            {
1837              oss << "?";
1838            }
1839            oss << '@' << m_id.get() << endl;
1840            break;
1841      case G_BINARY:
1842            oss << opToString(m_op) << endl;
1843            indent+='.';
1844            m_left->intoTreeString(oss, indent);
1845            m_right->intoTreeString(oss, indent);
1846            break;
1847      case G_UNARY:
1848      case G_UNARY_P:
1849      case G_NP1OUT:
1850      case G_NP1OUT_P:
1851      case G_REDUCTION:
1852            oss << opToString(m_op) << endl;
1853            indent+='.';
1854            m_left->intoTreeString(oss, indent);
1855            break;
1856      case G_TENSORPROD:
1857            oss << opToString(m_op) << endl;
1858            indent+='.';
1859            m_left->intoTreeString(oss, indent);
1860            m_right->intoTreeString(oss, indent);
1861            break;
1862      case G_NP1OUT_2P:
1863            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1864            indent+='.';
1865            m_left->intoTreeString(oss, indent);
1866            break;
1867      default:
1868            oss << "UNKNOWN";
1869      }
1870    }
1871    
1872    
1873  DataAbstract*  DataAbstract*
1874  DataLazy::deepCopy()  DataLazy::deepCopy() const
1875  {  {
1876    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1877    {    {
1878    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1879    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1880    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1881      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1882      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1883    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1884    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1885      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1886      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1887    default:    default:
1888      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1889    }    }
1890  }  }
1891    
1892    
1893    
1894  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
1895  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
1896  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
1897  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
1898  // form part of the expression.  // form part of the expression.
1899  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
1900  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
1901  DataLazy::getLength() const  DataLazy::getLength() const
1902  {  {
1903    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1270  DataLazy::getSlice(const DataTypes::Regi Line 1912  DataLazy::getSlice(const DataTypes::Regi
1912    
1913    
1914  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
1915  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
1916  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1917                   int dataPointNo)                   int dataPointNo)
1918  {  {
1919    if (m_op==IDENTITY)    if (m_op==IDENTITY)
1920    {    {
1921      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
1922    }    }
1923    if (m_readytype!='E')    if (m_readytype!='E')
1924    {    {
1925      collapse();          collapse();
1926      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
1927    }    }
1928    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
1929    // so we only need to know which child to ask    // so we only need to know which child to ask
1930    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
1931    {    {
1932      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
1933    }    }
1934    else    else
1935    {    {
1936      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
1937    }    }
1938  }  }
1939    
1940  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
1941  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
1942  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1943                   int dataPointNo) const                   int dataPointNo) const
1944  {  {
1945    if (m_op==IDENTITY)    if (m_op==IDENTITY)
1946    {    {
1947      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
1948    }    }
1949    if (m_readytype=='E')    if (m_readytype=='E')
1950    {    {
# Line 1310  DataLazy::getPointOffset(int sampleNo, Line 1952  DataLazy::getPointOffset(int sampleNo,
1952      // so we only need to know which child to ask      // so we only need to know which child to ask
1953      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
1954      {      {
1955      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
1956      }      }
1957      else      else
1958      {      {
1959      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
1960      }      }
1961    }    }
1962    if (m_readytype=='C')    if (m_readytype=='C')
1963    {    {
1964      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1965    }    }
1966    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1967  }  }
1968    
1969  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
1970  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
1971  void  void
1972  DataLazy::setToZero()  DataLazy::setToZero()
1973  {  {
1974    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
1975    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1976    m_op=IDENTITY;  //   m_op=IDENTITY;
1977    m_right.reset();    //   m_right.reset();  
1978    m_left.reset();  //   m_left.reset();
1979    m_readytype='C';  //   m_readytype='C';
1980    m_buffsRequired=1;  //   m_buffsRequired=1;
1981    
1982      (void)privdebug;  // to stop the compiler complaining about unused privdebug
1983      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1984    }
1985    
1986    bool
1987    DataLazy::actsExpanded() const
1988    {
1989            return (m_readytype=='E');
1990  }  }
1991    
1992  }   // end namespace  } // end namespace
1993    

Legend:
Removed from v.2092  
changed lines
  Added in v.6168

  ViewVC Help
Powered by ViewVC 1.1.26