/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 6168 by jfenwick, Wed Apr 13 03:08:12 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Apache License, version 2.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.apache.org/licenses/LICENSE-2.0
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
 #ifdef USE_NETCDF  
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
18  #include "Data.h"  #include "Data.h"
19  #include "UnaryFuncs.h"     // for escript::fsign  #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "Utils.h"
23    #include "DataVectorOps.h"
24    
25    #include <iomanip> // for some fancy formatting in debug
26    
27    using namespace escript::DataTypes;
28    
29    #define NO_ARG
30    
31    // #define LAZYDEBUG(X) if (privdebug){X;}
32    #define LAZYDEBUG(X)
33    namespace
34    {
35    bool privdebug=false;
36    
37    #define ENABLEDEBUG privdebug=true;
38    #define DISABLEDEBUG privdebug=false;
39    }
40    
41    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42    
43    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44    
45    
46    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
47    
48    /*
49    How does DataLazy work?
50    ~~~~~~~~~~~~~~~~~~~~~~~
51    
52    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
53    denoted left and right.
54    
55    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
56    This means that all "internal" nodes in the structure are instances of DataLazy.
57    
58    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
59    Note that IDENTITY is not considered a unary operation.
60    
61    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
62    It must however form a DAG (directed acyclic graph).
63    I will refer to individual DataLazy objects with the structure as nodes.
64    
65    Each node also stores:
66    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
67            evaluated.
68    - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
69            evaluate the expression.
70    - m_samplesize ~ the number of doubles stored in a sample.
71    
72    When a new node is created, the above values are computed based on the values in the child nodes.
73    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
74    
75    The resolve method, which produces a DataReady from a DataLazy, does the following:
76    1) Create a DataReady to hold the new result.
77    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
78    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
79    
80    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
81    
82    resolveSample returns a Vector* and an offset within that vector where the result is stored.
83    Normally, this would be v, but for identity nodes their internal vector is returned instead.
84    
85    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
86    
87    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
88    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
89    
90    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
91    1) Add to the ES_optype.
92    2) determine what opgroup your operation belongs to (X)
93    3) add a string for the op to the end of ES_opstrings
94    4) increase ES_opcount
95    5) add an entry (X) to opgroups
96    6) add an entry to the switch in collapseToReady
97    7) add an entry to resolveX
98    */
99    
100    
101  using namespace std;  using namespace std;
102  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 104  using namespace boost;
104  namespace escript  namespace escript
105  {  {
106    
 const std::string&  
 opToString(ES_optype op);  
   
107  namespace  namespace
108  {  {
109    
110    
111    // enabling this will print out when ever the maximum stacksize used by resolve increases
112    // it assumes _OPENMP is also in use
113    //#define LAZY_STACK_PROF
114    
 enum ES_opgroup  
 {  
    G_UNKNOWN,  
    G_IDENTITY,  
    G_BINARY,  
    G_UNARY  
 };  
115    
116    
117    #ifndef _OPENMP
118      #ifdef LAZY_STACK_PROF
119      #undef LAZY_STACK_PROF
120      #endif
121    #endif
122    
123    
124  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  #ifdef LAZY_STACK_PROF
125              "asin","acos","atan","sinh","cosh","tanh","erf",  std::vector<void*> stackstart(getNumberOfThreads());
126              "asinh","acosh","atanh",  std::vector<void*> stackend(getNumberOfThreads());
127              "log10","log","sign","abs","neg","pos","exp","sqrt",  size_t maxstackuse=0;
128              "1/","where>0","where<0","where>=0","where<=0"};  #endif
129  int ES_opcount=32;  
 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16  
             G_UNARY,G_UNARY,G_UNARY,                    // 19  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};  
 inline  
 ES_opgroup  
 getOpgroup(ES_optype op)  
 {  
   return opgroups[op];  
 }  
130    
131  // return the FunctionSpace of the result of "left op right"  // return the FunctionSpace of the result of "left op right"
132  FunctionSpace  FunctionSpace
133  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
134  {  {
135      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
136      // maybe we need an interpolate node -          // maybe we need an interpolate node -
137      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
138      // programming error exception.          // programming error exception.
139    
140      FunctionSpace l=left->getFunctionSpace();
141      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
142      {    if (l!=r)
143          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
144      }      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
145      return left->getFunctionSpace();      if (res==1)
146        {
147            return l;
148        }
149        if (res==-1)
150        {
151            return r;
152        }
153        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
154      }
155      return l;
156  }  }
157    
158  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
159    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
160  DataTypes::ShapeType  DataTypes::ShapeType
161  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
162  {  {
163      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
164      {          {
165          throw DataException("Shapes not the same - shapes must match for lazy data.");            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
166      }            {
167      return left->getShape();                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
168              }
169    
170              if (left->getRank()==0)       // we need to allow scalar * anything
171              {
172                    return right->getShape();
173              }
174              if (right->getRank()==0)
175              {
176                    return left->getShape();
177              }
178              throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
179            }
180            return left->getShape();
181  }  }
182    
183  size_t  // return the shape for "op left"
184  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
185    DataTypes::ShapeType
186    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
187  {  {
188     switch (getOpgroup(op))          switch(op)
189     {          {
190     case G_BINARY: return left->getLength();          case TRANS:
191     case G_UNARY: return left->getLength();             {                    // for the scoping of variables
192     default:                  const DataTypes::ShapeType& s=left->getShape();
193      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");                  DataTypes::ShapeType sh;
194     }                  int rank=left->getRank();
195                    if (axis_offset<0 || axis_offset>rank)
196                    {
197                stringstream e;
198                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
199                throw DataException(e.str());
200            }
201            for (int i=0; i<rank; i++)
202                    {
203                       int index = (axis_offset+i)%rank;
204               sh.push_back(s[index]); // Append to new shape
205            }
206                    return sh;
207               }
208            break;
209            case TRACE:
210               {
211                    int rank=left->getRank();
212                    if (rank<2)
213                    {
214                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
215                    }
216                    if ((axis_offset>rank-2) || (axis_offset<0))
217                    {
218                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
219                    }
220                    if (rank==2)
221                    {
222                       return DataTypes::scalarShape;
223                    }
224                    else if (rank==3)
225                    {
226                       DataTypes::ShapeType sh;
227                       if (axis_offset==0)
228                       {
229                            sh.push_back(left->getShape()[2]);
230                       }
231                       else         // offset==1
232                       {
233                            sh.push_back(left->getShape()[0]);
234                       }
235                       return sh;
236                    }
237                    else if (rank==4)
238                    {
239                       DataTypes::ShapeType sh;
240                       const DataTypes::ShapeType& s=left->getShape();
241                       if (axis_offset==0)
242                       {
243                            sh.push_back(s[2]);
244                            sh.push_back(s[3]);
245                       }
246                       else if (axis_offset==1)
247                       {
248                            sh.push_back(s[0]);
249                            sh.push_back(s[3]);
250                       }
251                       else         // offset==2
252                       {
253                            sh.push_back(s[0]);
254                            sh.push_back(s[1]);
255                       }
256                       return sh;
257                    }
258                    else            // unknown rank
259                    {
260                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
261                    }
262               }
263            break;
264            default:
265            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
266            }
267  }  }
268    
269  int  DataTypes::ShapeType
270  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
271  {  {
272     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
273     {       // Some of the checks are probably redundant here
274     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
275     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
276     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
277     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
278      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
279     }       int rank=left->getRank();
280         if (rank<2) {
281            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
282         }
283         if (axis0<0 || axis0>rank-1) {
284            stringstream e;
285            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
286            throw DataException(e.str());
287         }
288         if (axis1<0 || axis1>rank-1) {
289            stringstream e;
290            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
291            throw DataException(e.str());
292         }
293         if (axis0 == axis1) {
294             throw DataException("Error - Data::swapaxes: axis indices must be different.");
295         }
296         if (axis0 > axis1) {
297             axis0_tmp=axis1;
298             axis1_tmp=axis0;
299         } else {
300             axis0_tmp=axis0;
301             axis1_tmp=axis1;
302         }
303         for (int i=0; i<rank; i++) {
304           if (i == axis0_tmp) {
305              out_shape.push_back(s[axis1_tmp]);
306           } else if (i == axis1_tmp) {
307              out_shape.push_back(s[axis0_tmp]);
308           } else {
309              out_shape.push_back(s[i]);
310           }
311         }
312        return out_shape;
313  }  }
314    
315    
316    // determine the output shape for the general tensor product operation
317    // the additional parameters return information required later for the product
318    // the majority of this code is copy pasted from C_General_Tensor_Product
319    DataTypes::ShapeType
320    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
321    {
322            
323      // Get rank and shape of inputs
324      int rank0 = left->getRank();
325      int rank1 = right->getRank();
326      const DataTypes::ShapeType& shape0 = left->getShape();
327      const DataTypes::ShapeType& shape1 = right->getShape();
328    
329      // Prepare for the loops of the product and verify compatibility of shapes
330      int start0=0, start1=0;
331      if (transpose == 0)           {}
332      else if (transpose == 1)      { start0 = axis_offset; }
333      else if (transpose == 2)      { start1 = rank1-axis_offset; }
334      else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
335    
336  }   // end anonymous namespace    if (rank0<axis_offset)
337      {
338            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
339      }
340    
341      // Adjust the shapes for transpose
342      DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
343      DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
344      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
345      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
346    
347      // Prepare for the loops of the product
348      SL=1, SM=1, SR=1;
349      for (int i=0; i<rank0-axis_offset; i++)       {
350        SL *= tmpShape0[i];
351      }
352      for (int i=rank0-axis_offset; i<rank0; i++)   {
353        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
354          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
355        }
356        SM *= tmpShape0[i];
357      }
358      for (int i=axis_offset; i<rank1; i++)         {
359        SR *= tmpShape1[i];
360      }
361    
362  const std::string&    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
363  opToString(ES_optype op)    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
364  {    {                     // block to limit the scope of out_index
365    if (op<0 || op>=ES_opcount)       int out_index=0;
366         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
367         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
368      }
369    
370      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
371    {    {
372      op=UNKNOWNOP;       ostringstream os;
373         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
374         throw DataException(os.str());
375    }    }
376    return ES_opstrings[op];  
377      return shape2;
378  }  }
379    
380    }       // end anonymous namespace
381    
382    void DataLazy::LazyNodeSetup()
383    {
384    #ifdef _OPENMP
385        int numthreads=omp_get_max_threads();
386        m_samples.resize(numthreads*m_samplesize);
387        m_sampleids=new int[numthreads];
388        for (int i=0;i<numthreads;++i)
389        {
390            m_sampleids[i]=-1;  
391        }
392    #else
393        m_samples.resize(m_samplesize);
394        m_sampleids=new int[1];
395        m_sampleids[0]=-1;
396    #endif  // _OPENMP
397    }
398    
399    
400    // Creates an identity node
401  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
402      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
403      m_op(IDENTITY)          ,m_sampleids(0),
404            m_samples(1)
405  {  {
406     if (p->isLazy())     if (p->isLazy())
407     {     {
408      // TODO: fix this.   We could make the new node a copy of p?          // I don't want identity of Lazy.
409      // I don't want identity of Lazy.          // Question: Why would that be so bad?
410      // Question: Why would that be so bad?          // Answer: We assume that the child of ID is something we can call getVector on
411      // Answer: We assume that the child of ID is something we can call getVector on          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
     throw DataException("Programmer error - attempt to create identity from a DataLazy.");  
412     }     }
413     else     else
414     {     {
415      m_id=dynamic_pointer_cast<DataReady>(p);          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
416            makeIdentity(dr);
417    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
418     }     }
419     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
420  }  }
421    
422  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
423      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
424      m_op(op)          m_op(op),
425            m_axis_offset(0),
426            m_transpose(0),
427            m_SL(0), m_SM(0), m_SR(0)
428  {  {
429     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
430     {     {
431      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
432     }     }
433    
434     DataLazy_ptr lleft;     DataLazy_ptr lleft;
435     if (!left->isLazy())     if (!left->isLazy())
436     {     {
437      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
438     }     }
439     else     else
440     {     {
441      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
442     }     }
443     m_length=left->getLength();     m_readytype=lleft->m_readytype;
444     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
445     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
446       m_children=m_left->m_children+1;
447       m_height=m_left->m_height+1;
448       LazyNodeSetup();
449       SIZELIMIT
450  }  }
451    
452    
453  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
454      : parent(resultFS(left,right,op), resultShape(left,right,op)),  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
455      m_left(left),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
456      m_right(right),          m_op(op),
457      m_op(op)          m_SL(0), m_SM(0), m_SR(0)
458  {  {
459     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
460       if ((getOpgroup(op)!=G_BINARY))
461       {
462            throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
463       }
464    
465       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
466       {
467            FunctionSpace fs=getFunctionSpace();
468            Data ltemp(left);
469            Data tmp(ltemp,fs);
470            left=tmp.borrowDataPtr();
471       }
472       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
473       {
474            Data tmp(Data(right),getFunctionSpace());
475            right=tmp.borrowDataPtr();
476    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
477       }
478       left->operandCheck(*right);
479    
480       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
481     {     {
482      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          m_left=dynamic_pointer_cast<DataLazy>(left);
483    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
484       }
485       else
486       {
487            m_left=DataLazy_ptr(new DataLazy(left));
488    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
489       }
490       if (right->isLazy())
491       {
492            m_right=dynamic_pointer_cast<DataLazy>(right);
493    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
494       }
495       else
496       {
497            m_right=DataLazy_ptr(new DataLazy(right));
498    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
499       }
500       char lt=m_left->m_readytype;
501       char rt=m_right->m_readytype;
502       if (lt=='E' || rt=='E')
503       {
504            m_readytype='E';
505       }
506       else if (lt=='T' || rt=='T')
507       {
508            m_readytype='T';
509       }
510       else
511       {
512            m_readytype='C';
513     }     }
    m_length=resultLength(m_left,m_right,m_op);  
514     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
515     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+m_right->m_children+2;
516  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
517       LazyNodeSetup();
518       SIZELIMIT
519    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
520  }  }
521    
522  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
523      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
524      m_op(op)          m_op(op),
525            m_axis_offset(axis_offset),
526            m_transpose(transpose)
527  {  {
528     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
529       {
530            throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
531       }
532       if ((transpose>2) || (transpose<0))
533       {
534            throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
535       }
536       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
537       {
538            FunctionSpace fs=getFunctionSpace();
539            Data ltemp(left);
540            Data tmp(ltemp,fs);
541            left=tmp.borrowDataPtr();
542       }
543       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
544     {     {
545      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");          Data tmp(Data(right),getFunctionSpace());
546            right=tmp.borrowDataPtr();
547     }     }
548     if (left->isLazy())  //    left->operandCheck(*right);
549    
550       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
551     {     {
552      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
553     }     }
554     else     else
555     {     {
556      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
557     }     }
558     if (right->isLazy())     if (right->isLazy())
559     {     {
560      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
561       }
562       else
563       {
564            m_right=DataLazy_ptr(new DataLazy(right));
565       }
566       char lt=m_left->m_readytype;
567       char rt=m_right->m_readytype;
568       if (lt=='E' || rt=='E')
569       {
570            m_readytype='E';
571       }
572       else if (lt=='T' || rt=='T')
573       {
574            m_readytype='T';
575     }     }
576     else     else
577     {     {
578      m_right=DataLazy_ptr(new DataLazy(right));          m_readytype='C';
579     }     }
580       m_samplesize=getNumDPPSample()*getNoValues();
581       m_children=m_left->m_children+m_right->m_children+2;
582       m_height=max(m_left->m_height,m_right->m_height)+1;
583       LazyNodeSetup();
584       SIZELIMIT
585    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
586    }
587    
588     m_length=resultLength(m_left,m_right,m_op);  
589    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
590            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
591            m_op(op),
592            m_axis_offset(axis_offset),
593            m_transpose(0),
594            m_tol(0)
595    {
596       if ((getOpgroup(op)!=G_NP1OUT_P))
597       {
598            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
599       }
600       DataLazy_ptr lleft;
601       if (!left->isLazy())
602       {
603            lleft=DataLazy_ptr(new DataLazy(left));
604       }
605       else
606       {
607            lleft=dynamic_pointer_cast<DataLazy>(left);
608       }
609       m_readytype=lleft->m_readytype;
610       m_left=lleft;
611     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
612     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+1;
613  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
614       LazyNodeSetup();
615       SIZELIMIT
616    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
617  }  }
618    
619    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
620            : parent(left->getFunctionSpace(), left->getShape()),
621            m_op(op),
622            m_axis_offset(0),
623            m_transpose(0),
624            m_tol(tol)
625    {
626       if ((getOpgroup(op)!=G_UNARY_P))
627       {
628            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
629       }
630       DataLazy_ptr lleft;
631       if (!left->isLazy())
632       {
633            lleft=DataLazy_ptr(new DataLazy(left));
634       }
635       else
636       {
637            lleft=dynamic_pointer_cast<DataLazy>(left);
638       }
639       m_readytype=lleft->m_readytype;
640       m_left=lleft;
641       m_samplesize=getNumDPPSample()*getNoValues();
642       m_children=m_left->m_children+1;
643       m_height=m_left->m_height+1;
644       LazyNodeSetup();
645       SIZELIMIT
646    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
647    }
648    
649  DataLazy::~DataLazy()  
650    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
651            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
652            m_op(op),
653            m_axis_offset(axis0),
654            m_transpose(axis1),
655            m_tol(0)
656  {  {
657       if ((getOpgroup(op)!=G_NP1OUT_2P))
658       {
659            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
660       }
661       DataLazy_ptr lleft;
662       if (!left->isLazy())
663       {
664            lleft=DataLazy_ptr(new DataLazy(left));
665       }
666       else
667       {
668            lleft=dynamic_pointer_cast<DataLazy>(left);
669       }
670       m_readytype=lleft->m_readytype;
671       m_left=lleft;
672       m_samplesize=getNumDPPSample()*getNoValues();
673       m_children=m_left->m_children+1;
674       m_height=m_left->m_height+1;
675       LazyNodeSetup();
676       SIZELIMIT
677    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
678  }  }
679    
680    
681  int  namespace
 DataLazy::getBuffsRequired() const  
682  {  {
683      return m_buffsRequired;  
684        inline int max3(int a, int b, int c)
685        {
686            int t=(a>b?a:b);
687            return (t>c?t:c);
688    
689        }
690  }  }
691    
692    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
693            : parent(left->getFunctionSpace(), left->getShape()),
694            m_op(CONDEVAL),
695            m_axis_offset(0),
696            m_transpose(0),
697            m_tol(0)
698    {
699    
700  // the vector and the offset are a place where the method could write its data if it wishes     DataLazy_ptr lmask;
701  // it is not obligated to do so. For example, if it has its own storage already, it can use that.     DataLazy_ptr lleft;
702  // Hence the return value to indicate where the data is actually stored.     DataLazy_ptr lright;
703  // Regardless, the storage should be assumed to be used, even if it isn't.     if (!mask->isLazy())
704  const double*     {
705  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const          lmask=DataLazy_ptr(new DataLazy(mask));
706       }
707       else
708       {
709            lmask=dynamic_pointer_cast<DataLazy>(mask);
710       }
711       if (!left->isLazy())
712       {
713            lleft=DataLazy_ptr(new DataLazy(left));
714       }
715       else
716       {
717            lleft=dynamic_pointer_cast<DataLazy>(left);
718       }
719       if (!right->isLazy())
720       {
721            lright=DataLazy_ptr(new DataLazy(right));
722       }
723       else
724       {
725            lright=dynamic_pointer_cast<DataLazy>(right);
726       }
727       m_readytype=lmask->m_readytype;
728       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
729       {
730            throw DataException("Programmer Error - condEval arguments must have the same readytype");
731       }
732       m_left=lleft;
733       m_right=lright;
734       m_mask=lmask;
735       m_samplesize=getNumDPPSample()*getNoValues();
736       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
737       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
738       LazyNodeSetup();
739       SIZELIMIT
740    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
741    }
742    
743    
744    
745    DataLazy::~DataLazy()
746  {  {
747    if (m_op==IDENTITY)       delete[] m_sampleids;
748    }
749    
750    
751    /*
752      \brief Evaluates the expression using methods on Data.
753      This does the work for the collapse method.
754      For reasons of efficiency do not call this method on DataExpanded nodes.
755    */
756    DataReady_ptr
757    DataLazy::collapseToReady() const
758    {
759      if (m_readytype=='E')
760      {     // this is more an efficiency concern than anything else
761        throw DataException("Programmer Error - do not use collapse on Expanded data.");
762      }
763      if (m_op==IDENTITY)
764    {    {
765      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
766    }    }
767    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
768    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
769    const double* right=0;    Data right;
770    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
771    {    {
772      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
773    }    }
774    double* result=&(v[offset]);    Data result;
775      switch(m_op)
776    {    {
777      switch(m_op)      case ADD:
778      {          result=left+right;
779      case ADD:       // since these are pointwise ops, pretend each sample is one point          break;
780      tensor_binary_operation(m_samplesize, left, right, result, plus<double>());      case SUB:          
781      break;          result=left-right;
782      case SUB:                break;
783      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      case MUL:          
784      break;          result=left*right;
785      case MUL:                break;
786      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      case DIV:          
787      break;          result=left/right;
788      case DIV:                break;
     tensor_binary_operation(m_samplesize, left, right, result, divides<double>());  
     break;  
 // unary ops  
789      case SIN:      case SIN:
790      tensor_unary_operation(m_samplesize, left, result, ::sin);          result=left.sin();      
791      break;          break;
792      case COS:      case COS:
793      tensor_unary_operation(m_samplesize, left, result, ::cos);          result=left.cos();
794      break;          break;
795      case TAN:      case TAN:
796      tensor_unary_operation(m_samplesize, left, result, ::tan);          result=left.tan();
797      break;          break;
798      case ASIN:      case ASIN:
799      tensor_unary_operation(m_samplesize, left, result, ::asin);          result=left.asin();
800      break;          break;
801      case ACOS:      case ACOS:
802      tensor_unary_operation(m_samplesize, left, result, ::acos);          result=left.acos();
803      break;          break;
804      case ATAN:      case ATAN:
805      tensor_unary_operation(m_samplesize, left, result, ::atan);          result=left.atan();
806      break;          break;
807      case SINH:      case SINH:
808      tensor_unary_operation(m_samplesize, left, result, ::sinh);          result=left.sinh();
809      break;          break;
810      case COSH:      case COSH:
811      tensor_unary_operation(m_samplesize, left, result, ::cosh);          result=left.cosh();
812      break;          break;
813      case TANH:      case TANH:
814      tensor_unary_operation(m_samplesize, left, result, ::tanh);          result=left.tanh();
815      break;          break;
816      case ERF:      case ERF:
817  #ifdef _WIN32          result=left.erf();
818      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");          break;
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
819     case ASINH:     case ASINH:
820  #ifdef _WIN32          result=left.asinh();
821      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);          break;
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
822     case ACOSH:     case ACOSH:
823  #ifdef _WIN32          result=left.acosh();
824      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);          break;
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
825     case ATANH:     case ATANH:
826  #ifdef _WIN32          result=left.atanh();
827      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);          break;
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
828      case LOG10:      case LOG10:
829      tensor_unary_operation(m_samplesize, left, result, ::log10);          result=left.log10();
830      break;          break;
831      case LOG:      case LOG:
832      tensor_unary_operation(m_samplesize, left, result, ::log);          result=left.log();
833      break;          break;
834      case SIGN:      case SIGN:
835      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          result=left.sign();
836      break;          break;
837      case ABS:      case ABS:
838      tensor_unary_operation(m_samplesize, left, result, ::fabs);          result=left.abs();
839      break;          break;
840      case NEG:      case NEG:
841      tensor_unary_operation(m_samplesize, left, result, negate<double>());          result=left.neg();
842      break;          break;
843      case POS:      case POS:
844      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
845      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
846      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
847      break;          break;
848      case EXP:      case EXP:
849      tensor_unary_operation(m_samplesize, left, result, ::exp);          result=left.exp();
850      break;          break;
851      case SQRT:      case SQRT:
852      tensor_unary_operation(m_samplesize, left, result, ::sqrt);          result=left.sqrt();
853      break;          break;
854      case RECIP:      case RECIP:
855      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          result=left.oneOver();
856      break;          break;
857      case GZ:      case GZ:
858      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          result=left.wherePositive();
859      break;          break;
860      case LZ:      case LZ:
861      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          result=left.whereNegative();
862      break;          break;
863      case GEZ:      case GEZ:
864      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          result=left.whereNonNegative();
865      break;          break;
866      case LEZ:      case LEZ:
867      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          result=left.whereNonPositive();
868            break;
869        case NEZ:
870            result=left.whereNonZero(m_tol);
871            break;
872        case EZ:
873            result=left.whereZero(m_tol);
874            break;
875        case SYM:
876            result=left.symmetric();
877            break;
878        case NSYM:
879            result=left.antisymmetric();
880            break;
881        case PROD:
882            result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
883            break;
884        case TRANS:
885            result=left.transpose(m_axis_offset);
886            break;
887        case TRACE:
888            result=left.trace(m_axis_offset);
889            break;
890        case SWAP:
891            result=left.swapaxes(m_axis_offset, m_transpose);
892            break;
893        case MINVAL:
894            result=left.minval();
895            break;
896        case MAXVAL:
897            result=left.minval();
898            break;
899        case HER:
900        result=left.hermitian();
901      break;      break;
902        default:
903            throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
904      }
905      return result.borrowReadyPtr();
906    }
907    
908    /*
909       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
910       This method uses the original methods on the Data class to evaluate the expressions.
911       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
912       the purpose of using DataLazy in the first place).
913    */
914    void
915    DataLazy::collapse() const
916    {
917      if (m_op==IDENTITY)
918      {
919            return;
920      }
921      if (m_readytype=='E')
922      {     // this is more an efficiency concern than anything else
923        throw DataException("Programmer Error - do not use collapse on Expanded data.");
924      }
925      m_id=collapseToReady();
926      m_op=IDENTITY;
927    }
928    
929    // The result will be stored in m_samples
930    // The return value is a pointer to the DataVector, offset is the offset within the return value
931    const DataTypes::RealVectorType*
932    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
933    {
934    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
935            // collapse so we have a 'E' node or an IDENTITY for some other type
936      if (m_readytype!='E' && m_op!=IDENTITY)
937      {
938            collapse();
939      }
940      if (m_op==IDENTITY)  
941      {
942        const RealVectorType& vec=m_id->getVectorRO();
943        roffset=m_id->getPointOffset(sampleNo, 0);
944    #ifdef LAZY_STACK_PROF
945    int x;
946    if (&x<stackend[omp_get_thread_num()])
947    {
948           stackend[omp_get_thread_num()]=&x;
949    }
950    #endif
951        return &(vec);
952      }
953      if (m_readytype!='E')
954      {
955        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
956      }
957      if (m_sampleids[tid]==sampleNo)
958      {
959            roffset=tid*m_samplesize;
960            return &(m_samples);            // sample is already resolved
961      }
962      m_sampleids[tid]=sampleNo;
963    
964      switch (getOpgroup(m_op))
965      {
966      case G_UNARY:
967      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
968      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
969      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
970      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
971      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
972      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
973      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
974      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
975      default:
976        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
977      }
978    }
979    
980    const DataTypes::RealVectorType*
981    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
982    {
983            // we assume that any collapsing has been done before we get here
984            // since we only have one argument we don't need to think about only
985            // processing single points.
986            // we will also know we won't get identity nodes
987      if (m_readytype!='E')
988      {
989        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
990      }
991      if (m_op==IDENTITY)
992      {
993        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
994      }
995      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
996      const double* left=&((*leftres)[roffset]);
997      roffset=m_samplesize*tid;
998      double* result=&(m_samples[roffset]);
999      if (m_op==POS)
1000      {
1001        // this should be prevented earlier
1002        // operation is meaningless for lazy
1003            throw DataException("Programmer error - POS not supported for lazy data.");    
1004      }
1005      tensor_unary_array_operation(m_samplesize,
1006                                 left,
1007                                 result,
1008                                 m_op,
1009                                 m_tol);  
1010      return &(m_samples);
1011    }
1012    
1013    
1014    const DataTypes::RealVectorType*
1015    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1016    {
1017            // we assume that any collapsing has been done before we get here
1018            // since we only have one argument we don't need to think about only
1019            // processing single points.
1020            // we will also know we won't get identity nodes
1021      if (m_readytype!='E')
1022      {
1023        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1024      }
1025      if (m_op==IDENTITY)
1026      {
1027        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1028      }
1029      size_t loffset=0;
1030      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1031    
1032      roffset=m_samplesize*tid;
1033      unsigned int ndpps=getNumDPPSample();
1034      unsigned int psize=DataTypes::noValues(m_left->getShape());
1035      double* result=&(m_samples[roffset]);
1036      switch (m_op)
1037      {
1038        case MINVAL:
1039            {
1040              for (unsigned int z=0;z<ndpps;++z)
1041              {
1042                FMin op;
1043                *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1044                loffset+=psize;
1045                result++;
1046              }
1047            }
1048            break;
1049        case MAXVAL:
1050            {
1051              for (unsigned int z=0;z<ndpps;++z)
1052              {
1053              FMax op;
1054              *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1055              loffset+=psize;
1056              result++;
1057              }
1058            }
1059            break;
1060      default:      default:
1061      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1062      }    }
1063      return &(m_samples);
1064    }
1065    
1066    const DataTypes::RealVectorType*
1067    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1068    {
1069            // we assume that any collapsing has been done before we get here
1070            // since we only have one argument we don't need to think about only
1071            // processing single points.
1072      if (m_readytype!='E')
1073      {
1074        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1075      }
1076      if (m_op==IDENTITY)
1077      {
1078        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1079      }
1080      size_t subroffset;
1081      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1082      roffset=m_samplesize*tid;
1083      size_t loop=0;
1084      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1085      size_t step=getNoValues();
1086      size_t offset=roffset;
1087      switch (m_op)
1088      {
1089        case SYM:
1090            for (loop=0;loop<numsteps;++loop)
1091            {
1092                escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1093                subroffset+=step;
1094                offset+=step;
1095            }
1096            break;
1097        case NSYM:
1098            for (loop=0;loop<numsteps;++loop)
1099            {
1100                escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1101                subroffset+=step;
1102                offset+=step;
1103            }
1104            break;
1105        default:
1106            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1107      }
1108      return &m_samples;
1109    }
1110    
1111    const DataTypes::RealVectorType*
1112    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1113    {
1114            // we assume that any collapsing has been done before we get here
1115            // since we only have one argument we don't need to think about only
1116            // processing single points.
1117      if (m_readytype!='E')
1118      {
1119        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1120      }
1121      if (m_op==IDENTITY)
1122      {
1123        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1124      }
1125      size_t subroffset;
1126      size_t offset;
1127      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1128      roffset=m_samplesize*tid;
1129      offset=roffset;
1130      size_t loop=0;
1131      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1132      size_t outstep=getNoValues();
1133      size_t instep=m_left->getNoValues();
1134      switch (m_op)
1135      {
1136        case TRACE:
1137            for (loop=0;loop<numsteps;++loop)
1138            {
1139                escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1140                subroffset+=instep;
1141                offset+=outstep;
1142            }
1143            break;
1144        case TRANS:
1145            for (loop=0;loop<numsteps;++loop)
1146            {
1147                escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1148                subroffset+=instep;
1149                offset+=outstep;
1150            }
1151            break;
1152        default:
1153            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1154      }
1155      return &m_samples;
1156    }
1157    
1158    
1159    const DataTypes::RealVectorType*
1160    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1161    {
1162      if (m_readytype!='E')
1163      {
1164        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1165      }
1166      if (m_op==IDENTITY)
1167      {
1168        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1169      }
1170      size_t subroffset;
1171      size_t offset;
1172      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1173      roffset=m_samplesize*tid;
1174      offset=roffset;
1175      size_t loop=0;
1176      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1177      size_t outstep=getNoValues();
1178      size_t instep=m_left->getNoValues();
1179      switch (m_op)
1180      {
1181        case SWAP:
1182            for (loop=0;loop<numsteps;++loop)
1183            {
1184                escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1185                subroffset+=instep;
1186                offset+=outstep;
1187            }
1188            break;
1189        default:
1190            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1191      }
1192      return &m_samples;
1193    }
1194    
1195    const DataTypes::RealVectorType*
1196    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1197    {
1198      if (m_readytype!='E')
1199      {
1200        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1201      }
1202      if (m_op!=CONDEVAL)
1203      {
1204        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1205      }
1206      size_t subroffset;
1207    
1208      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1209      const RealVectorType* srcres=0;
1210      if ((*maskres)[subroffset]>0)
1211      {
1212            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1213      }
1214      else
1215      {
1216            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1217      }
1218    
1219      // Now we need to copy the result
1220    
1221      roffset=m_samplesize*tid;
1222      for (int i=0;i<m_samplesize;++i)
1223      {
1224            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1225      }
1226    
1227      return &m_samples;
1228    }
1229    
1230    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1231    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1232    // If both children are expanded, then we can process them in a single operation (we treat
1233    // the whole sample as one big datapoint.
1234    // If one of the children is not expanded, then we need to treat each point in the sample
1235    // individually.
1236    // There is an additional complication when scalar operations are considered.
1237    // For example, 2+Vector.
1238    // In this case each double within the point is treated individually
1239    const DataTypes::RealVectorType*
1240    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1241    {
1242    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1243    
1244      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1245            // first work out which of the children are expanded
1246      bool leftExp=(m_left->m_readytype=='E');
1247      bool rightExp=(m_right->m_readytype=='E');
1248      if (!leftExp && !rightExp)
1249      {
1250            throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1251      }
1252      bool leftScalar=(m_left->getRank()==0);
1253      bool rightScalar=(m_right->getRank()==0);
1254      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1255      {
1256            throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1257      }
1258      size_t leftsize=m_left->getNoValues();
1259      size_t rightsize=m_right->getNoValues();
1260      size_t chunksize=1;                   // how many doubles will be processed in one go
1261      int leftstep=0;               // how far should the left offset advance after each step
1262      int rightstep=0;
1263      int numsteps=0;               // total number of steps for the inner loop
1264      int oleftstep=0;      // the o variables refer to the outer loop
1265      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1266      int onumsteps=1;
1267      
1268      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1269      bool RES=(rightExp && rightScalar);
1270      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1271      bool RS=(!rightExp && rightScalar);
1272      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1273      bool RN=(!rightExp && !rightScalar);
1274      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1275      bool REN=(rightExp && !rightScalar);
1276    
1277      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1278      {
1279            chunksize=m_left->getNumDPPSample()*leftsize;
1280            leftstep=0;
1281            rightstep=0;
1282            numsteps=1;
1283      }
1284      else if (LES || RES)
1285      {
1286            chunksize=1;
1287            if (LES)                // left is an expanded scalar
1288            {
1289                    if (RS)
1290                    {
1291                       leftstep=1;
1292                       rightstep=0;
1293                       numsteps=m_left->getNumDPPSample();
1294                    }
1295                    else            // RN or REN
1296                    {
1297                       leftstep=0;
1298                       oleftstep=1;
1299                       rightstep=1;
1300                       orightstep=(RN ? -(int)rightsize : 0);
1301                       numsteps=rightsize;
1302                       onumsteps=m_left->getNumDPPSample();
1303                    }
1304            }
1305            else            // right is an expanded scalar
1306            {
1307                    if (LS)
1308                    {
1309                       rightstep=1;
1310                       leftstep=0;
1311                       numsteps=m_right->getNumDPPSample();
1312                    }
1313                    else
1314                    {
1315                       rightstep=0;
1316                       orightstep=1;
1317                       leftstep=1;
1318                       oleftstep=(LN ? -(int)leftsize : 0);
1319                       numsteps=leftsize;
1320                       onumsteps=m_right->getNumDPPSample();
1321                    }
1322            }
1323      }
1324      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1325      {
1326            if (LEN)        // and Right will be a single value
1327            {
1328                    chunksize=rightsize;
1329                    leftstep=rightsize;
1330                    rightstep=0;
1331                    numsteps=m_left->getNumDPPSample();
1332                    if (RS)
1333                    {
1334                       numsteps*=leftsize;
1335                    }
1336            }
1337            else    // REN
1338            {
1339                    chunksize=leftsize;
1340                    rightstep=leftsize;
1341                    leftstep=0;
1342                    numsteps=m_right->getNumDPPSample();
1343                    if (LS)
1344                    {
1345                       numsteps*=rightsize;
1346                    }
1347            }
1348      }
1349    
1350      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1351            // Get the values of sub-expressions
1352      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1353      const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1354    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1355    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1356    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1357    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1358    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1359    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1360    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1361    
1362    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1363    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1364    
1365    
1366      roffset=m_samplesize*tid;
1367      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1368      switch(m_op)
1369      {
1370        case ADD:
1371            //PROC_OP(NO_ARG,plus<double>());
1372          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1373                 &(*left)[0],
1374                 &(*right)[0],
1375                 chunksize,
1376                 onumsteps,
1377                 numsteps,
1378                 resultStep,
1379                 leftstep,
1380                 rightstep,
1381                 oleftstep,
1382                 orightstep,
1383                 lroffset,
1384                 rroffset,
1385                 escript::ES_optype::ADD);  
1386            break;
1387        case SUB:
1388          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1389                 &(*left)[0],
1390                 &(*right)[0],
1391                 chunksize,
1392                 onumsteps,
1393                 numsteps,
1394                 resultStep,
1395                 leftstep,
1396                 rightstep,
1397                 oleftstep,
1398                 orightstep,
1399                 lroffset,
1400                 rroffset,
1401                 escript::ES_optype::SUB);        
1402            //PROC_OP(NO_ARG,minus<double>());
1403            break;
1404        case MUL:
1405            //PROC_OP(NO_ARG,multiplies<double>());
1406          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1407                 &(*left)[0],
1408                 &(*right)[0],
1409                 chunksize,
1410                 onumsteps,
1411                 numsteps,
1412                 resultStep,
1413                 leftstep,
1414                 rightstep,
1415                 oleftstep,
1416                 orightstep,
1417                 lroffset,
1418                 rroffset,
1419                 escript::ES_optype::MUL);        
1420            break;
1421        case DIV:
1422            //PROC_OP(NO_ARG,divides<double>());
1423          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1424                 &(*left)[0],
1425                 &(*right)[0],
1426                 chunksize,
1427                 onumsteps,
1428                 numsteps,
1429                 resultStep,
1430                 leftstep,
1431                 rightstep,
1432                 oleftstep,
1433                 orightstep,
1434                 lroffset,
1435                 rroffset,
1436                 escript::ES_optype::DIV);        
1437            break;
1438        case POW:
1439           //PROC_OP(double (double,double),::pow);
1440          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1441                 &(*left)[0],
1442                 &(*right)[0],
1443                 chunksize,
1444                 onumsteps,
1445                 numsteps,
1446                 resultStep,
1447                 leftstep,
1448                 rightstep,
1449                 oleftstep,
1450                 orightstep,
1451                 lroffset,
1452                 rroffset,
1453                 escript::ES_optype::POW);        
1454            break;
1455        default:
1456            throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1457    }    }
1458    return result;  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1459      return &m_samples;
1460  }  }
1461    
1462    
1463    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1464    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1465    // unlike the other resolve helpers, we must treat these datapoints separately.
1466    const DataTypes::RealVectorType*
1467    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1468    {
1469    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1470    
1471      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1472            // first work out which of the children are expanded
1473      bool leftExp=(m_left->m_readytype=='E');
1474      bool rightExp=(m_right->m_readytype=='E');
1475      int steps=getNumDPPSample();
1476      int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1477      int rightStep=(rightExp?m_right->getNoValues() : 0);
1478    
1479      int resultStep=getNoValues();
1480      roffset=m_samplesize*tid;
1481      size_t offset=roffset;
1482    
1483      const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1484    
1485      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1486    
1487    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1488    cout << getNoValues() << endl;)
1489    
1490    
1491    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1492    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1493    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1494    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1495    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1496    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1497    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1498    
1499      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1500      switch(m_op)
1501      {
1502        case PROD:
1503            for (int i=0;i<steps;++i,resultp+=resultStep)
1504            {
1505              const double *ptr_0 = &((*left)[lroffset]);
1506              const double *ptr_1 = &((*right)[rroffset]);
1507    
1508    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1509    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1510    
1511              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1512    
1513              lroffset+=leftStep;
1514              rroffset+=rightStep;
1515            }
1516            break;
1517        default:
1518            throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1519      }
1520      roffset=offset;
1521      return &m_samples;
1522    }
1523    
1524    
1525    const DataTypes::RealVectorType*
1526    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1527    {
1528    #ifdef _OPENMP
1529            int tid=omp_get_thread_num();
1530    #else
1531            int tid=0;
1532    #endif
1533    
1534    #ifdef LAZY_STACK_PROF
1535            stackstart[tid]=&tid;
1536            stackend[tid]=&tid;
1537            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1538            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1539            #pragma omp critical
1540            if (d>maxstackuse)
1541            {
1542    cout << "Max resolve Stack use " << d << endl;
1543                    maxstackuse=d;
1544            }
1545            return r;
1546    #else
1547            return resolveNodeSample(tid, sampleNo, roffset);
1548    #endif
1549    }
1550    
1551    
1552    // This needs to do the work of the identity constructor
1553    void
1554    DataLazy::resolveToIdentity()
1555    {
1556       if (m_op==IDENTITY)
1557            return;
1558       DataReady_ptr p=resolveNodeWorker();
1559       makeIdentity(p);
1560    }
1561    
1562    void DataLazy::makeIdentity(const DataReady_ptr& p)
1563    {
1564       m_op=IDENTITY;
1565       m_axis_offset=0;
1566       m_transpose=0;
1567       m_SL=m_SM=m_SR=0;
1568       m_children=m_height=0;
1569       m_id=p;
1570       if(p->isConstant()) {m_readytype='C';}
1571       else if(p->isExpanded()) {m_readytype='E';}
1572       else if (p->isTagged()) {m_readytype='T';}
1573       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1574       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1575       m_left.reset();
1576       m_right.reset();
1577    }
1578    
1579    
1580  DataReady_ptr  DataReady_ptr
1581  DataLazy::resolve()  DataLazy::resolve()
1582  {  {
1583    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1584    // so the allocation of v needs to move inside the loop somehow      return m_id;
1585    }
1586    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1587    
1588    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1589    int numthreads=1;  void
1590    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1591    {
1592      if (dats.empty())
1593      {
1594            return;
1595      }
1596      vector<DataLazy*> work;
1597      FunctionSpace fs=dats[0]->getFunctionSpace();
1598      bool match=true;
1599      for (int i=dats.size()-1;i>=0;--i)
1600      {
1601            if (dats[i]->m_readytype!='E')
1602            {
1603                    dats[i]->collapse();
1604            }
1605            if (dats[i]->m_op!=IDENTITY)
1606            {
1607                    work.push_back(dats[i]);
1608                    if (fs!=dats[i]->getFunctionSpace())
1609                    {
1610                            match=false;
1611                    }
1612            }
1613      }
1614      if (work.empty())
1615      {
1616            return;         // no work to do
1617      }
1618      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1619      {             // it is possible that dats[0] is one of the objects which we discarded and
1620                    // all the other functionspaces match.
1621            vector<DataExpanded*> dep;
1622            vector<RealVectorType*> vecs;
1623            for (int i=0;i<work.size();++i)
1624            {
1625                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1626                    vecs.push_back(&(dep[i]->getVectorRW()));
1627            }
1628            int totalsamples=work[0]->getNumSamples();
1629            const RealVectorType* res=0; // Storage for answer
1630            int sample;
1631            #pragma omp parallel private(sample, res)
1632            {
1633                size_t roffset=0;
1634                #pragma omp for schedule(static)
1635                for (sample=0;sample<totalsamples;++sample)
1636                {
1637                    roffset=0;
1638                    int j;
1639                    for (j=work.size()-1;j>=0;--j)
1640                    {
1641  #ifdef _OPENMP  #ifdef _OPENMP
1642    numthreads=omp_get_max_threads();                      res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1643    int threadnum=0;  #else
1644  #endif                      res=work[j]->resolveNodeSample(0,sample,roffset);
1645    ValueType v(numthreads*threadbuffersize);  #endif
1646  cout << "Buffer created with size=" << v.size() << endl;                      RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1647    ValueType dummy(getNoValues());                      memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1648    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);                  }
1649    ValueType& resvec=result->getVector();              }
1650            }
1651            // Now we need to load the new results as identity ops into the lazy nodes
1652            for (int i=work.size()-1;i>=0;--i)
1653            {
1654                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1655            }
1656      }
1657      else  // functionspaces do not match
1658      {
1659            for (int i=0;i<work.size();++i)
1660            {
1661                    work[i]->resolveToIdentity();
1662            }
1663      }
1664    }
1665    
1666    
1667    
1668    // This version of resolve uses storage in each node to hold results
1669    DataReady_ptr
1670    DataLazy::resolveNodeWorker()
1671    {
1672      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1673      {
1674        collapse();
1675      }
1676      if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1677      {
1678        return m_id;
1679      }
1680            // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1681      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1682      RealVectorType& resvec=result->getVectorRW();
1683    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1684    
1685    int sample;    int sample;
   int resoffset;  
1686    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1687    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const RealVectorType* res=0;       // Storage for answer
1688    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1689      #pragma omp parallel private(sample,res)
1690    {    {
1691            size_t roffset=0;
1692    #ifdef LAZY_STACK_PROF
1693            stackstart[omp_get_thread_num()]=&roffset;
1694            stackend[omp_get_thread_num()]=&roffset;
1695    #endif
1696            #pragma omp for schedule(static)
1697            for (sample=0;sample<totalsamples;++sample)
1698            {
1699                    roffset=0;
1700  #ifdef _OPENMP  #ifdef _OPENMP
1701      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1702  #else  #else
1703      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1704  #endif  #endif
1705      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1706      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1707      {                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1708      resvec[resoffset]=res[i];                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1709      }          }
1710    }    }
1711    #ifdef LAZY_STACK_PROF
1712      for (int i=0;i<getNumberOfThreads();++i)
1713      {
1714            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1715    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1716            if (r>maxstackuse)
1717            {
1718                    maxstackuse=r;
1719            }
1720      }
1721      cout << "Max resolve Stack use=" << maxstackuse << endl;
1722    #endif
1723    return resptr;    return resptr;
1724  }  }
1725    
# Line 430  std::string Line 1727  std::string
1727  DataLazy::toString() const  DataLazy::toString() const
1728  {  {
1729    ostringstream oss;    ostringstream oss;
1730    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1731    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1732      {
1733      case 1:       // tree format
1734            oss << endl;
1735            intoTreeString(oss,"");
1736            break;
1737      case 2:       // just the depth
1738            break;
1739      default:
1740            intoString(oss);
1741            break;
1742      }
1743    return oss.str();    return oss.str();
1744  }  }
1745    
1746    
1747  void  void
1748  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1749  {  {
1750    //    oss << "[" << m_children <<";"<<m_height <<"]";
1751    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1752    {    {
1753    case G_IDENTITY:    case G_IDENTITY:
1754      oss << '@' << m_id.get();          if (m_id->isExpanded())
1755      break;          {
1756               oss << "E";
1757            }
1758            else if (m_id->isTagged())
1759            {
1760              oss << "T";
1761            }
1762            else if (m_id->isConstant())
1763            {
1764              oss << "C";
1765            }
1766            else
1767            {
1768              oss << "?";
1769            }
1770            oss << '@' << m_id.get();
1771            break;
1772    case G_BINARY:    case G_BINARY:
1773      oss << '(';          oss << '(';
1774      m_left->intoString(oss);          m_left->intoString(oss);
1775      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1776      m_right->intoString(oss);          m_right->intoString(oss);
1777      oss << ')';          oss << ')';
1778      break;          break;
1779    case G_UNARY:    case G_UNARY:
1780      oss << opToString(m_op) << '(';    case G_UNARY_P:
1781      m_left->intoString(oss);    case G_NP1OUT:
1782      oss << ')';    case G_NP1OUT_P:
1783      break;    case G_REDUCTION:
1784            oss << opToString(m_op) << '(';
1785            m_left->intoString(oss);
1786            oss << ')';
1787            break;
1788      case G_TENSORPROD:
1789            oss << opToString(m_op) << '(';
1790            m_left->intoString(oss);
1791            oss << ", ";
1792            m_right->intoString(oss);
1793            oss << ')';
1794            break;
1795      case G_NP1OUT_2P:
1796            oss << opToString(m_op) << '(';
1797            m_left->intoString(oss);
1798            oss << ", " << m_axis_offset << ", " << m_transpose;
1799            oss << ')';
1800            break;
1801      case G_CONDEVAL:
1802            oss << opToString(m_op)<< '(' ;
1803            m_mask->intoString(oss);
1804            oss << " ? ";
1805            m_left->intoString(oss);
1806            oss << " : ";
1807            m_right->intoString(oss);
1808            oss << ')';
1809            break;
1810    default:    default:
1811      oss << "UNKNOWN";          oss << "UNKNOWN";
1812    }    }
1813  }  }
1814    
1815  // Note that in this case, deepCopy does not make copies of the leaves.  
1816  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1817    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1818    {
1819      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1820      switch (getOpgroup(m_op))
1821      {
1822      case G_IDENTITY:
1823            if (m_id->isExpanded())
1824            {
1825               oss << "E";
1826            }
1827            else if (m_id->isTagged())
1828            {
1829              oss << "T";
1830            }
1831            else if (m_id->isConstant())
1832            {
1833              oss << "C";
1834            }
1835            else
1836            {
1837              oss << "?";
1838            }
1839            oss << '@' << m_id.get() << endl;
1840            break;
1841      case G_BINARY:
1842            oss << opToString(m_op) << endl;
1843            indent+='.';
1844            m_left->intoTreeString(oss, indent);
1845            m_right->intoTreeString(oss, indent);
1846            break;
1847      case G_UNARY:
1848      case G_UNARY_P:
1849      case G_NP1OUT:
1850      case G_NP1OUT_P:
1851      case G_REDUCTION:
1852            oss << opToString(m_op) << endl;
1853            indent+='.';
1854            m_left->intoTreeString(oss, indent);
1855            break;
1856      case G_TENSORPROD:
1857            oss << opToString(m_op) << endl;
1858            indent+='.';
1859            m_left->intoTreeString(oss, indent);
1860            m_right->intoTreeString(oss, indent);
1861            break;
1862      case G_NP1OUT_2P:
1863            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1864            indent+='.';
1865            m_left->intoTreeString(oss, indent);
1866            break;
1867      default:
1868            oss << "UNKNOWN";
1869      }
1870    }
1871    
1872    
1873  DataAbstract*  DataAbstract*
1874  DataLazy::deepCopy()  DataLazy::deepCopy() const
1875  {  {
1876    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1877    {    {
1878      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1879      case G_UNARY:
1880      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1881      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1882      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1883      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1884      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1885      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1886      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1887      default:
1888            throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1889    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1890  }  }
1891    
1892    
1893  DataTypes::ValueType::size_type  
1894    // There is no single, natural interpretation of getLength on DataLazy.
1895    // Instances of DataReady can look at the size of their vectors.
1896    // For lazy though, it could be the size the data would be if it were resolved;
1897    // or it could be some function of the lengths of the DataReady instances which
1898    // form part of the expression.
1899    // Rather than have people making assumptions, I have disabled the method.
1900    DataTypes::RealVectorType::size_type
1901  DataLazy::getLength() const  DataLazy::getLength() const
1902  {  {
1903    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1904  }  }
1905    
1906    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 1910  DataLazy::getSlice(const DataTypes::Regi
1910    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1911  }  }
1912    
1913  DataTypes::ValueType::size_type  
1914    // To do this we need to rely on our child nodes
1915    DataTypes::RealVectorType::size_type
1916    DataLazy::getPointOffset(int sampleNo,
1917                     int dataPointNo)
1918    {
1919      if (m_op==IDENTITY)
1920      {
1921            return m_id->getPointOffset(sampleNo,dataPointNo);
1922      }
1923      if (m_readytype!='E')
1924      {
1925            collapse();
1926            return m_id->getPointOffset(sampleNo,dataPointNo);
1927      }
1928      // at this point we do not have an identity node and the expression will be Expanded
1929      // so we only need to know which child to ask
1930      if (m_left->m_readytype=='E')
1931      {
1932            return m_left->getPointOffset(sampleNo,dataPointNo);
1933      }
1934      else
1935      {
1936            return m_right->getPointOffset(sampleNo,dataPointNo);
1937      }
1938    }
1939    
1940    // To do this we need to rely on our child nodes
1941    DataTypes::RealVectorType::size_type
1942  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1943                   int dataPointNo) const                   int dataPointNo) const
1944  {  {
1945    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1946      {
1947            return m_id->getPointOffset(sampleNo,dataPointNo);
1948      }
1949      if (m_readytype=='E')
1950      {
1951        // at this point we do not have an identity node and the expression will be Expanded
1952        // so we only need to know which child to ask
1953        if (m_left->m_readytype=='E')
1954        {
1955            return m_left->getPointOffset(sampleNo,dataPointNo);
1956        }
1957        else
1958        {
1959            return m_right->getPointOffset(sampleNo,dataPointNo);
1960        }
1961      }
1962      if (m_readytype=='C')
1963      {
1964            return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1965      }
1966      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1967    }
1968    
1969    
1970    // I have decided to let Data:: handle this issue.
1971    void
1972    DataLazy::setToZero()
1973    {
1974    //   DataTypes::RealVectorType v(getNoValues(),0);
1975    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1976    //   m_op=IDENTITY;
1977    //   m_right.reset();  
1978    //   m_left.reset();
1979    //   m_readytype='C';
1980    //   m_buffsRequired=1;
1981    
1982      (void)privdebug;  // to stop the compiler complaining about unused privdebug
1983      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1984  }  }
1985    
1986  }   // end namespace  bool
1987    DataLazy::actsExpanded() const
1988    {
1989            return (m_readytype=='E');
1990    }
1991    
1992    } // end namespace
1993    

Legend:
Removed from v.1888  
changed lines
  Added in v.6168

  ViewVC Help
Powered by ViewVC 1.1.26