/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1898 by jfenwick, Mon Oct 20 01:20:18 2008 UTC branches/clazy/escriptcore/src/DataLazy.cpp revision 6511 by jfenwick, Fri Mar 3 01:41:39 2017 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Apache License, version 2.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.apache.org/licenses/LICENSE-2.0
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
 #ifdef USE_NETCDF  
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
18  #include "Data.h"  #include "Data.h"
19  #include "UnaryFuncs.h"     // for escript::fsign  #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "Utils.h"
23    #include "DataVectorOps.h"
24    
25    #include <iomanip> // for some fancy formatting in debug
26    
27    using namespace escript::DataTypes;
28    
29    #define NO_ARG
30    
31    // #define LAZYDEBUG(X) if (privdebug){X;}
32    #define LAZYDEBUG(X)
33    namespace
34    {
35    bool privdebug=false;
36    
37    #define ENABLEDEBUG privdebug=true;
38    #define DISABLEDEBUG privdebug=false;
39    }
40    
41    //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42    
43    //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44    
45    
46    #define SIZELIMIT \
47        if (m_height > escript::escriptParams.getTooManyLevels()) {\
48            if (escript::escriptParams.getLazyVerbose()) {\
49                cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;\
50            }\
51            resolveToIdentity();\
52        }
53    
54    /*
55    How does DataLazy work?
56    ~~~~~~~~~~~~~~~~~~~~~~~
57    
58    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
59    denoted left and right.
60    
61    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
62    This means that all "internal" nodes in the structure are instances of DataLazy.
63    
64    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65    Note that IDENTITY is not considered a unary operation.
66    
67    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68    It must however form a DAG (directed acyclic graph).
69    I will refer to individual DataLazy objects with the structure as nodes.
70    
71    Each node also stores:
72    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73            evaluated.
74    - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75            evaluate the expression.
76    - m_samplesize ~ the number of doubles stored in a sample.
77    
78    When a new node is created, the above values are computed based on the values in the child nodes.
79    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
80    
81    The resolve method, which produces a DataReady from a DataLazy, does the following:
82    1) Create a DataReady to hold the new result.
83    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
84    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
85    
86    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
87    
88    resolveSample returns a Vector* and an offset within that vector where the result is stored.
89    Normally, this would be v, but for identity nodes their internal vector is returned instead.
90    
91    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
92    
93    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
94    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
95    
96    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
97    1) Add to the ES_optype.
98    2) determine what opgroup your operation belongs to (X)
99    3) add a string for the op to the end of ES_opstrings
100    4) increase ES_opcount
101    5) add an entry (X) to opgroups
102    6) add an entry to the switch in collapseToReady
103    7) add an entry to resolveX
104    */
105    
106    
107  using namespace std;  using namespace std;
108  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 110  using namespace boost;
110  namespace escript  namespace escript
111  {  {
112    
 const std::string&  
 opToString(ES_optype op);  
   
113  namespace  namespace
114  {  {
115    
116    
117    // enabling this will print out when ever the maximum stacksize used by resolve increases
118    // it assumes _OPENMP is also in use
119    //#define LAZY_STACK_PROF
120    
 enum ES_opgroup  
 {  
    G_UNKNOWN,  
    G_IDENTITY,  
    G_BINARY,  
    G_UNARY  
 };  
121    
122    
123    #ifndef _OPENMP
124      #ifdef LAZY_STACK_PROF
125      #undef LAZY_STACK_PROF
126      #endif
127    #endif
128    
129    
130  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  #ifdef LAZY_STACK_PROF
131              "asin","acos","atan","sinh","cosh","tanh","erf",  std::vector<void*> stackstart(getNumberOfThreads());
132              "asinh","acosh","atanh",  std::vector<void*> stackend(getNumberOfThreads());
133              "log10","log","sign","abs","neg","pos","exp","sqrt",  size_t maxstackuse=0;
134              "1/","where>0","where<0","where>=0","where<=0"};  #endif
135  int ES_opcount=32;  
 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16  
             G_UNARY,G_UNARY,G_UNARY,                    // 19  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};  
 inline  
 ES_opgroup  
 getOpgroup(ES_optype op)  
 {  
   return opgroups[op];  
 }  
136    
137  // return the FunctionSpace of the result of "left op right"  // return the FunctionSpace of the result of "left op right"
138  FunctionSpace  FunctionSpace
139  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
140  {  {
141      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
142      // maybe we need an interpolate node -          // maybe we need an interpolate node -
143      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
144      // programming error exception.          // programming error exception.
145    
146      FunctionSpace l=left->getFunctionSpace();
147      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
148      {    if (l!=r)
149          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
150      }      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
151      return left->getFunctionSpace();      if (res==1)
152        {
153            return l;
154        }
155        if (res==-1)
156        {
157            return r;
158        }
159        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
160      }
161      return l;
162  }  }
163    
164  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
165    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
166  DataTypes::ShapeType  DataTypes::ShapeType
167  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
168  {  {
169      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
170      {          {
171          throw DataException("Shapes not the same - shapes must match for lazy data.");            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
172      }            {
173      return left->getShape();                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
174              }
175    
176              if (left->getRank()==0)       // we need to allow scalar * anything
177              {
178                    return right->getShape();
179              }
180              if (right->getRank()==0)
181              {
182                    return left->getShape();
183              }
184              throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
185            }
186            return left->getShape();
187  }  }
188    
189  size_t  // return the shape for "op left"
190  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
191    DataTypes::ShapeType
192    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
193  {  {
194     switch (getOpgroup(op))          switch(op)
195     {          {
196     case G_BINARY: return left->getLength();          case TRANS:
197     case G_UNARY: return left->getLength();             {                    // for the scoping of variables
198     default:                  const DataTypes::ShapeType& s=left->getShape();
199      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");                  DataTypes::ShapeType sh;
200     }                  int rank=left->getRank();
201                    if (axis_offset<0 || axis_offset>rank)
202                    {
203                stringstream e;
204                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
205                throw DataException(e.str());
206            }
207            for (int i=0; i<rank; i++)
208                    {
209                       int index = (axis_offset+i)%rank;
210               sh.push_back(s[index]); // Append to new shape
211            }
212                    return sh;
213               }
214            break;
215            case TRACE:
216               {
217                    int rank=left->getRank();
218                    if (rank<2)
219                    {
220                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
221                    }
222                    if ((axis_offset>rank-2) || (axis_offset<0))
223                    {
224                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
225                    }
226                    if (rank==2)
227                    {
228                       return DataTypes::scalarShape;
229                    }
230                    else if (rank==3)
231                    {
232                       DataTypes::ShapeType sh;
233                       if (axis_offset==0)
234                       {
235                            sh.push_back(left->getShape()[2]);
236                       }
237                       else         // offset==1
238                       {
239                            sh.push_back(left->getShape()[0]);
240                       }
241                       return sh;
242                    }
243                    else if (rank==4)
244                    {
245                       DataTypes::ShapeType sh;
246                       const DataTypes::ShapeType& s=left->getShape();
247                       if (axis_offset==0)
248                       {
249                            sh.push_back(s[2]);
250                            sh.push_back(s[3]);
251                       }
252                       else if (axis_offset==1)
253                       {
254                            sh.push_back(s[0]);
255                            sh.push_back(s[3]);
256                       }
257                       else         // offset==2
258                       {
259                            sh.push_back(s[0]);
260                            sh.push_back(s[1]);
261                       }
262                       return sh;
263                    }
264                    else            // unknown rank
265                    {
266                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
267                    }
268               }
269            break;
270            default:
271            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
272            }
273  }  }
274    
275  int  DataTypes::ShapeType
276  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
277  {  {
278     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
279     {       // Some of the checks are probably redundant here
280     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
281     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
282     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
283     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
284      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
285     }       int rank=left->getRank();
286         if (rank<2) {
287            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
288         }
289         if (axis0<0 || axis0>rank-1) {
290            stringstream e;
291            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
292            throw DataException(e.str());
293         }
294         if (axis1<0 || axis1>rank-1) {
295            stringstream e;
296            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
297            throw DataException(e.str());
298         }
299         if (axis0 == axis1) {
300             throw DataException("Error - Data::swapaxes: axis indices must be different.");
301         }
302         if (axis0 > axis1) {
303             axis0_tmp=axis1;
304             axis1_tmp=axis0;
305         } else {
306             axis0_tmp=axis0;
307             axis1_tmp=axis1;
308         }
309         for (int i=0; i<rank; i++) {
310           if (i == axis0_tmp) {
311              out_shape.push_back(s[axis1_tmp]);
312           } else if (i == axis1_tmp) {
313              out_shape.push_back(s[axis0_tmp]);
314           } else {
315              out_shape.push_back(s[i]);
316           }
317         }
318        return out_shape;
319  }  }
320    
 }   // end anonymous namespace  
   
321    
322  const std::string&  // determine the output shape for the general tensor product operation
323  opToString(ES_optype op)  // the additional parameters return information required later for the product
324    // the majority of this code is copy pasted from C_General_Tensor_Product
325    DataTypes::ShapeType
326    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
327  {  {
328    if (op<0 || op>=ES_opcount)          
329      // Get rank and shape of inputs
330      int rank0 = left->getRank();
331      int rank1 = right->getRank();
332      const DataTypes::ShapeType& shape0 = left->getShape();
333      const DataTypes::ShapeType& shape1 = right->getShape();
334    
335      // Prepare for the loops of the product and verify compatibility of shapes
336      int start0=0, start1=0;
337      if (transpose == 0)           {}
338      else if (transpose == 1)      { start0 = axis_offset; }
339      else if (transpose == 2)      { start1 = rank1-axis_offset; }
340      else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
341    
342      if (rank0<axis_offset)
343    {    {
344      op=UNKNOWNOP;          throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
345    }    }
346    return ES_opstrings[op];  
347      // Adjust the shapes for transpose
348      DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
349      DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
350      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
351      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
352    
353      // Prepare for the loops of the product
354      SL=1, SM=1, SR=1;
355      for (int i=0; i<rank0-axis_offset; i++)       {
356        SL *= tmpShape0[i];
357      }
358      for (int i=rank0-axis_offset; i<rank0; i++)   {
359        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
360          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
361        }
362        SM *= tmpShape0[i];
363      }
364      for (int i=axis_offset; i<rank1; i++)         {
365        SR *= tmpShape1[i];
366      }
367    
368      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
369      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
370      {                     // block to limit the scope of out_index
371         int out_index=0;
372         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
373         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
374      }
375    
376      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
377      {
378         ostringstream os;
379         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
380         throw DataException(os.str());
381      }
382    
383      return shape2;
384  }  }
385    
386    }       // end anonymous namespace
387    
388    void DataLazy::LazyNodeSetup()
389    {
390    #ifdef _OPENMP
391        int numthreads=omp_get_max_threads();
392        m_samples.resize(numthreads*m_samplesize);
393        m_sampleids=new int[numthreads];
394        for (int i=0;i<numthreads;++i)
395        {
396            m_sampleids[i]=-1;  
397        }
398    #else
399        m_samples.resize(m_samplesize);
400        m_sampleids=new int[1];
401        m_sampleids[0]=-1;
402    #endif  // _OPENMP
403    }
404    
405    
406    // Creates an identity node
407  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
408      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
409      m_op(IDENTITY)          ,m_sampleids(0),
410            m_samples(1)
411  {  {
412     if (p->isLazy())     if (p->isLazy())
413     {     {
414      // TODO: fix this.   We could make the new node a copy of p?          // I don't want identity of Lazy.
415      // I don't want identity of Lazy.          // Question: Why would that be so bad?
416      // Question: Why would that be so bad?          // Answer: We assume that the child of ID is something we can call getVector on
417      // Answer: We assume that the child of ID is something we can call getVector on          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
     throw DataException("Programmer error - attempt to create identity from a DataLazy.");  
418     }     }
419     else     else
420     {     {
421      m_id=dynamic_pointer_cast<DataReady>(p);          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
422      if(p->isConstant()) {m_readytype='C';}          makeIdentity(dr);
423      else if(p->isExpanded()) {m_readytype='E';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else if (p->isTagged()) {m_readytype='T';}  
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
424     }     }
425     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
426  }  }
427    
428  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
429      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
430      m_op(op)          m_op(op),
431            m_axis_offset(0),
432            m_transpose(0),
433            m_SL(0), m_SM(0), m_SR(0)
434  {  {
435     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
436     {     {
437      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
438     }     }
439    
440     DataLazy_ptr lleft;     DataLazy_ptr lleft;
441     if (!left->isLazy())     if (!left->isLazy())
442     {     {
443      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
444     }     }
445     else     else
446     {     {
447      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
448     }     }
449     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
450     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
451     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
452       m_children=m_left->m_children+1;
453       m_height=m_left->m_height+1;
454       LazyNodeSetup();
455       SIZELIMIT
456  }  }
457    
458    
459  // DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
 //  : parent(resultFS(left,right,op), resultShape(left,right,op)),  
 //  m_left(left),  
 //  m_right(right),  
 //  m_op(op)  
 // {  
 //    if (getOpgroup(op)!=G_BINARY)  
 //    {  
 //  throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");  
 //    }  
 //    m_length=resultLength(m_left,m_right,m_op);  
 //    m_samplesize=getNumDPPSample()*getNoValues();  
 //    m_buffsRequired=calcBuffs(m_left, m_right, m_op);  
 // cout << "(2)Lazy created with " << m_samplesize << endl;  
 // }  
   
460  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
461      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
462      m_op(op)          m_op(op),
463            m_SL(0), m_SM(0), m_SR(0)
464  {  {
465     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
466       if ((getOpgroup(op)!=G_BINARY))
467     {     {
468      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
469     }     }
470     if (left->isLazy())  
471       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
472       {
473            FunctionSpace fs=getFunctionSpace();
474            Data ltemp(left);
475            Data tmp(ltemp,fs);
476            left=tmp.borrowDataPtr();
477       }
478       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
479     {     {
480      m_left=dynamic_pointer_cast<DataLazy>(left);          Data tmp(Data(right),getFunctionSpace());
481            right=tmp.borrowDataPtr();
482    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
483       }
484       left->operandCheck(*right);
485    
486       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
487       {
488            m_left=dynamic_pointer_cast<DataLazy>(left);
489    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
490     }     }
491     else     else
492     {     {
493      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
494    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
495     }     }
496     if (right->isLazy())     if (right->isLazy())
497     {     {
498      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
499    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
500     }     }
501     else     else
502     {     {
503      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
504    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
505     }     }
506     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
507     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
508     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
509     {     {
510      m_readytype='E';          m_readytype='E';
511     }     }
512     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
513     {     {
514      m_readytype='T';          m_readytype='T';
515     }     }
516     else     else
517     {     {
518      m_readytype='C';          m_readytype='C';
519     }     }
    m_length=resultLength(m_left,m_right,m_op);  
520     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
521     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+m_right->m_children+2;
522  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
523       LazyNodeSetup();
524       SIZELIMIT
525    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
526  }  }
527    
528    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
529            : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
530            m_op(op),
531            m_axis_offset(axis_offset),
532            m_transpose(transpose)
533    {
534       if ((getOpgroup(op)!=G_TENSORPROD))
535       {
536            throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
537       }
538       if ((transpose>2) || (transpose<0))
539       {
540            throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
541       }
542       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
543       {
544            FunctionSpace fs=getFunctionSpace();
545            Data ltemp(left);
546            Data tmp(ltemp,fs);
547            left=tmp.borrowDataPtr();
548       }
549       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
550       {
551            Data tmp(Data(right),getFunctionSpace());
552            right=tmp.borrowDataPtr();
553       }
554    //    left->operandCheck(*right);
555    
556  DataLazy::~DataLazy()     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
557       {
558            m_left=dynamic_pointer_cast<DataLazy>(left);
559       }
560       else
561       {
562            m_left=DataLazy_ptr(new DataLazy(left));
563       }
564       if (right->isLazy())
565       {
566            m_right=dynamic_pointer_cast<DataLazy>(right);
567       }
568       else
569       {
570            m_right=DataLazy_ptr(new DataLazy(right));
571       }
572       char lt=m_left->m_readytype;
573       char rt=m_right->m_readytype;
574       if (lt=='E' || rt=='E')
575       {
576            m_readytype='E';
577       }
578       else if (lt=='T' || rt=='T')
579       {
580            m_readytype='T';
581       }
582       else
583       {
584            m_readytype='C';
585       }
586       m_samplesize=getNumDPPSample()*getNoValues();
587       m_children=m_left->m_children+m_right->m_children+2;
588       m_height=max(m_left->m_height,m_right->m_height)+1;
589       LazyNodeSetup();
590       SIZELIMIT
591    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
592    }
593    
594    
595    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
596            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
597            m_op(op),
598            m_axis_offset(axis_offset),
599            m_transpose(0),
600            m_tol(0)
601    {
602       if ((getOpgroup(op)!=G_NP1OUT_P))
603       {
604            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
605       }
606       DataLazy_ptr lleft;
607       if (!left->isLazy())
608       {
609            lleft=DataLazy_ptr(new DataLazy(left));
610       }
611       else
612       {
613            lleft=dynamic_pointer_cast<DataLazy>(left);
614       }
615       m_readytype=lleft->m_readytype;
616       m_left=lleft;
617       m_samplesize=getNumDPPSample()*getNoValues();
618       m_children=m_left->m_children+1;
619       m_height=m_left->m_height+1;
620       LazyNodeSetup();
621       SIZELIMIT
622    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
623    }
624    
625    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
626            : parent(left->getFunctionSpace(), left->getShape()),
627            m_op(op),
628            m_axis_offset(0),
629            m_transpose(0),
630            m_tol(tol)
631    {
632       if ((getOpgroup(op)!=G_UNARY_P))
633       {
634            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
635       }
636       DataLazy_ptr lleft;
637       if (!left->isLazy())
638       {
639            lleft=DataLazy_ptr(new DataLazy(left));
640       }
641       else
642       {
643            lleft=dynamic_pointer_cast<DataLazy>(left);
644       }
645       m_readytype=lleft->m_readytype;
646       m_left=lleft;
647       m_samplesize=getNumDPPSample()*getNoValues();
648       m_children=m_left->m_children+1;
649       m_height=m_left->m_height+1;
650       LazyNodeSetup();
651       SIZELIMIT
652    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
653    }
654    
655    
656    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
657            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
658            m_op(op),
659            m_axis_offset(axis0),
660            m_transpose(axis1),
661            m_tol(0)
662    {
663       if ((getOpgroup(op)!=G_NP1OUT_2P))
664       {
665            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
666       }
667       DataLazy_ptr lleft;
668       if (!left->isLazy())
669       {
670            lleft=DataLazy_ptr(new DataLazy(left));
671       }
672       else
673       {
674            lleft=dynamic_pointer_cast<DataLazy>(left);
675       }
676       m_readytype=lleft->m_readytype;
677       m_left=lleft;
678       m_samplesize=getNumDPPSample()*getNoValues();
679       m_children=m_left->m_children+1;
680       m_height=m_left->m_height+1;
681       LazyNodeSetup();
682       SIZELIMIT
683    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
684    }
685    
686    
687    namespace
688    {
689    
690        inline int max3(int a, int b, int c)
691        {
692            int t=(a>b?a:b);
693            return (t>c?t:c);
694    
695        }
696    }
697    
698    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
699            : parent(left->getFunctionSpace(), left->getShape()),
700            m_op(CONDEVAL),
701            m_axis_offset(0),
702            m_transpose(0),
703            m_tol(0)
704  {  {
705    
706       DataLazy_ptr lmask;
707       DataLazy_ptr lleft;
708       DataLazy_ptr lright;
709       if (!mask->isLazy())
710       {
711            lmask=DataLazy_ptr(new DataLazy(mask));
712       }
713       else
714       {
715            lmask=dynamic_pointer_cast<DataLazy>(mask);
716       }
717       if (!left->isLazy())
718       {
719            lleft=DataLazy_ptr(new DataLazy(left));
720       }
721       else
722       {
723            lleft=dynamic_pointer_cast<DataLazy>(left);
724       }
725       if (!right->isLazy())
726       {
727            lright=DataLazy_ptr(new DataLazy(right));
728       }
729       else
730       {
731            lright=dynamic_pointer_cast<DataLazy>(right);
732       }
733       m_readytype=lmask->m_readytype;
734       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
735       {
736            throw DataException("Programmer Error - condEval arguments must have the same readytype");
737       }
738       m_left=lleft;
739       m_right=lright;
740       m_mask=lmask;
741       m_samplesize=getNumDPPSample()*getNoValues();
742       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
743       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
744       LazyNodeSetup();
745       SIZELIMIT
746    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
747  }  }
748    
749    
750  int  
751  DataLazy::getBuffsRequired() const  DataLazy::~DataLazy()
752  {  {
753      return m_buffsRequired;     delete[] m_sampleids;
754  }  }
755    
756    
757    /*
758      \brief Evaluates the expression using methods on Data.
759      This does the work for the collapse method.
760      For reasons of efficiency do not call this method on DataExpanded nodes.
761    */
762  DataReady_ptr  DataReady_ptr
763  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
764  {  {
765    if (m_readytype=='E')    if (m_readytype=='E')
766    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
767      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
768    }    }
769    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 275  DataLazy::collapseToReady() Line 773  DataLazy::collapseToReady()
773    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
774    Data left(pleft);    Data left(pleft);
775    Data right;    Data right;
776    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
777    {    {
778      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
779    }    }
# Line 283  DataLazy::collapseToReady() Line 781  DataLazy::collapseToReady()
781    switch(m_op)    switch(m_op)
782    {    {
783      case ADD:      case ADD:
784      result=left+right;          result=left+right;
785      break;          break;
786      case SUB:            case SUB:          
787      result=left-right;          result=left-right;
788      break;          break;
789      case MUL:            case MUL:          
790      result=left*right;          result=left*right;
791      break;          break;
792      case DIV:            case DIV:          
793      result=left/right;          result=left/right;
794      break;          break;
795      case SIN:      case SIN:
796      result=left.sin();            result=left.sin();      
797      break;          break;
798      case COS:      case COS:
799      result=left.cos();          result=left.cos();
800      break;          break;
801      case TAN:      case TAN:
802      result=left.tan();          result=left.tan();
803      break;          break;
804      case ASIN:      case ASIN:
805      result=left.asin();          result=left.asin();
806      break;          break;
807      case ACOS:      case ACOS:
808      result=left.acos();          result=left.acos();
809      break;          break;
810      case ATAN:      case ATAN:
811      result=left.atan();          result=left.atan();
812      break;          break;
813      case SINH:      case SINH:
814      result=left.sinh();          result=left.sinh();
815      break;          break;
816      case COSH:      case COSH:
817      result=left.cosh();          result=left.cosh();
818      break;          break;
819      case TANH:      case TANH:
820      result=left.tanh();          result=left.tanh();
821      break;          break;
822      case ERF:      case ERF:
823      result=left.erf();          result=left.erf();
824      break;          break;
825     case ASINH:     case ASINH:
826      result=left.asinh();          result=left.asinh();
827      break;          break;
828     case ACOSH:     case ACOSH:
829      result=left.acosh();          result=left.acosh();
830      break;          break;
831     case ATANH:     case ATANH:
832      result=left.atanh();          result=left.atanh();
833      break;          break;
834      case LOG10:      case LOG10:
835      result=left.log10();          result=left.log10();
836      break;          break;
837      case LOG:      case LOG:
838      result=left.log();          result=left.log();
839      break;          break;
840      case SIGN:      case SIGN:
841      result=left.sign();          result=left.sign();
842      break;          break;
843      case ABS:      case ABS:
844      result=left.abs();          result=left.abs();
845      break;          break;
846      case NEG:      case NEG:
847      result=left.neg();          result=left.neg();
848      break;          break;
849      case POS:      case POS:
850      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
851      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
852      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
853      break;          break;
854      case EXP:      case EXP:
855      result=left.exp();          result=left.exp();
856      break;          break;
857      case SQRT:      case SQRT:
858      result=left.sqrt();          result=left.sqrt();
859      break;          break;
860      case RECIP:      case RECIP:
861      result=left.oneOver();          result=left.oneOver();
862      break;          break;
863      case GZ:      case GZ:
864      result=left.wherePositive();          result=left.wherePositive();
865      break;          break;
866      case LZ:      case LZ:
867      result=left.whereNegative();          result=left.whereNegative();
868      break;          break;
869      case GEZ:      case GEZ:
870      result=left.whereNonNegative();          result=left.whereNonNegative();
871      break;          break;
872      case LEZ:      case LEZ:
873      result=left.whereNonPositive();          result=left.whereNonPositive();
874            break;
875        case NEZ:
876            result=left.whereNonZero(m_tol);
877            break;
878        case EZ:
879            result=left.whereZero(m_tol);
880            break;
881        case SYM:
882            result=left.symmetric();
883            break;
884        case NSYM:
885            result=left.antisymmetric();
886            break;
887        case PROD:
888            result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
889            break;
890        case TRANS:
891            result=left.transpose(m_axis_offset);
892            break;
893        case TRACE:
894            result=left.trace(m_axis_offset);
895            break;
896        case SWAP:
897            result=left.swapaxes(m_axis_offset, m_transpose);
898            break;
899        case MINVAL:
900            result=left.minval();
901            break;
902        case MAXVAL:
903            result=left.minval();
904            break;
905        case HER:
906        result=left.hermitian();
907      break;      break;
908      default:      default:
909      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
910    }    }
911    return result.borrowReadyPtr();    return result.borrowReadyPtr();
912  }  }
913    
914    /*
915       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
916       This method uses the original methods on the Data class to evaluate the expressions.
917       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
918       the purpose of using DataLazy in the first place).
919    */
920  void  void
921  DataLazy::collapse()  DataLazy::collapse() const
922  {  {
923    if (m_op==IDENTITY)    if (m_op==IDENTITY)
924    {    {
925      return;          return;
926    }    }
927    if (m_readytype=='E')    if (m_readytype=='E')
928    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
929      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
930    }    }
931    m_id=collapseToReady();    m_id=collapseToReady();
932    m_op=IDENTITY;    m_op=IDENTITY;
933  }  }
934    
935  DataTypes::ValueType*  // The result will be stored in m_samples
936  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  // The return value is a pointer to the DataVector, offset is the offset within the return value
937    const DataTypes::RealVectorType*
938    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
939    {
940    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
941            // collapse so we have a 'E' node or an IDENTITY for some other type
942      if (m_readytype!='E' && m_op!=IDENTITY)
943      {
944            collapse();
945      }
946      if (m_op==IDENTITY)  
947      {
948        const RealVectorType& vec=m_id->getVectorRO();
949        roffset=m_id->getPointOffset(sampleNo, 0);
950    #ifdef LAZY_STACK_PROF
951    int x;
952    if (&x<stackend[omp_get_thread_num()])
953  {  {
954      // we assume that any collapsing has been done before we get here         stackend[omp_get_thread_num()]=&x;
955      // since we only have one argument we don't need to think about only  }
956      // processing single points.  #endif
957        return &(vec);
958      }
959      if (m_readytype!='E')
960      {
961        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
962      }
963      if (m_sampleids[tid]==sampleNo)
964      {
965            roffset=tid*m_samplesize;
966            return &(m_samples);            // sample is already resolved
967      }
968      m_sampleids[tid]=sampleNo;
969    
970      switch (getOpgroup(m_op))
971      {
972      case G_UNARY:
973      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
974      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
975      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
976      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
977      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
978      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
979      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
980      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
981      default:
982        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
983      }
984    }
985    
986    const DataTypes::RealVectorType*
987    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
988    {
989            // we assume that any collapsing has been done before we get here
990            // since we only have one argument we don't need to think about only
991            // processing single points.
992            // we will also know we won't get identity nodes
993    if (m_readytype!='E')    if (m_readytype!='E')
994    {    {
995      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
996    }    }
997    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
998    const double* left=&((*vleft)[roffset]);    {
999    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1000    roffset=offset;    }
1001      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1002      const double* left=&((*leftres)[roffset]);
1003      roffset=m_samplesize*tid;
1004      double* result=&(m_samples[roffset]);
1005      if (m_op==POS)
1006      {
1007        // this should be prevented earlier
1008        // operation is meaningless for lazy
1009            throw DataException("Programmer error - POS not supported for lazy data.");    
1010      }
1011      tensor_unary_array_operation(m_samplesize,
1012                                 left,
1013                                 result,
1014                                 m_op,
1015                                 m_tol);  
1016      return &(m_samples);
1017    }
1018    
1019    
1020    const DataTypes::RealVectorType*
1021    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1022    {
1023            // we assume that any collapsing has been done before we get here
1024            // since we only have one argument we don't need to think about only
1025            // processing single points.
1026            // we will also know we won't get identity nodes
1027      if (m_readytype!='E')
1028      {
1029        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1030      }
1031      if (m_op==IDENTITY)
1032      {
1033        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1034      }
1035      size_t loffset=0;
1036      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1037    
1038      roffset=m_samplesize*tid;
1039      unsigned int ndpps=getNumDPPSample();
1040      unsigned int psize=DataTypes::noValues(m_left->getShape());
1041      double* result=&(m_samples[roffset]);
1042    switch (m_op)    switch (m_op)
1043    {    {
1044      case SIN:        case MINVAL:
1045      tensor_unary_operation(m_samplesize, left, result, ::sin);          {
1046      break;            for (unsigned int z=0;z<ndpps;++z)
1047      case COS:            {
1048      tensor_unary_operation(m_samplesize, left, result, ::cos);              FMin op;
1049      break;              *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1050      case TAN:              loffset+=psize;
1051      tensor_unary_operation(m_samplesize, left, result, ::tan);              result++;
1052      break;            }
1053      case ASIN:          }
1054      tensor_unary_operation(m_samplesize, left, result, ::asin);          break;
1055      break;      case MAXVAL:
1056      case ACOS:          {
1057      tensor_unary_operation(m_samplesize, left, result, ::acos);            for (unsigned int z=0;z<ndpps;++z)
1058      break;            {
1059      case ATAN:            FMax op;
1060      tensor_unary_operation(m_samplesize, left, result, ::atan);            *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1061      break;            loffset+=psize;
1062      case SINH:            result++;
1063      tensor_unary_operation(m_samplesize, left, result, ::sinh);            }
1064      break;          }
1065      case COSH:          break;
1066      tensor_unary_operation(m_samplesize, left, result, ::cosh);      default:
1067      break;          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1068      case TANH:    }
1069      tensor_unary_operation(m_samplesize, left, result, ::tanh);    return &(m_samples);
1070      break;  }
     case ERF:  
 #ifdef _WIN32  
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
    case ASINH:  
 #ifdef _WIN32  
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
    case ACOSH:  
 #ifdef _WIN32  
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
    case ATANH:  
 #ifdef _WIN32  
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
     case LOG10:  
     tensor_unary_operation(m_samplesize, left, result, ::log10);  
     break;  
     case LOG:  
     tensor_unary_operation(m_samplesize, left, result, ::log);  
     break;  
     case SIGN:  
     tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
     break;  
     case ABS:  
     tensor_unary_operation(m_samplesize, left, result, ::fabs);  
     break;  
     case NEG:  
     tensor_unary_operation(m_samplesize, left, result, negate<double>());  
     break;  
     case POS:  
     // it doesn't mean anything for delayed.  
     // it will just trigger a deep copy of the lazy object  
     throw DataException("Programmer error - POS not supported for lazy data.");  
     break;  
     case EXP:  
     tensor_unary_operation(m_samplesize, left, result, ::exp);  
     break;  
     case SQRT:  
     tensor_unary_operation(m_samplesize, left, result, ::sqrt);  
     break;  
     case RECIP:  
     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
     break;  
     case GZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
     break;  
     case LZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
     break;  
     case GEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
     break;  
     case LEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
     break;  
1071    
1072    const DataTypes::RealVectorType*
1073    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1074    {
1075            // we assume that any collapsing has been done before we get here
1076            // since we only have one argument we don't need to think about only
1077            // processing single points.
1078      if (m_readytype!='E')
1079      {
1080        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1081      }
1082      if (m_op==IDENTITY)
1083      {
1084        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1085      }
1086      size_t subroffset;
1087      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1088      roffset=m_samplesize*tid;
1089      size_t loop=0;
1090      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1091      size_t step=getNoValues();
1092      size_t offset=roffset;
1093      switch (m_op)
1094      {
1095        case SYM:
1096            for (loop=0;loop<numsteps;++loop)
1097            {
1098                escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1099                subroffset+=step;
1100                offset+=step;
1101            }
1102            break;
1103        case NSYM:
1104            for (loop=0;loop<numsteps;++loop)
1105            {
1106                escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1107                subroffset+=step;
1108                offset+=step;
1109            }
1110            break;
1111      default:      default:
1112      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1113    }    }
1114    return &v;    return &m_samples;
1115  }  }
1116    
1117    const DataTypes::RealVectorType*
1118    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1119    {
1120            // we assume that any collapsing has been done before we get here
1121            // since we only have one argument we don't need to think about only
1122            // processing single points.
1123      if (m_readytype!='E')
1124      {
1125        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1126      }
1127      if (m_op==IDENTITY)
1128      {
1129        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1130      }
1131      size_t subroffset;
1132      size_t offset;
1133      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1134      roffset=m_samplesize*tid;
1135      offset=roffset;
1136      size_t loop=0;
1137      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1138      size_t outstep=getNoValues();
1139      size_t instep=m_left->getNoValues();
1140      switch (m_op)
1141      {
1142        case TRACE:
1143            for (loop=0;loop<numsteps;++loop)
1144            {
1145                escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1146                subroffset+=instep;
1147                offset+=outstep;
1148            }
1149            break;
1150        case TRANS:
1151            for (loop=0;loop<numsteps;++loop)
1152            {
1153                escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1154                subroffset+=instep;
1155                offset+=outstep;
1156            }
1157            break;
1158        default:
1159            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1160      }
1161      return &m_samples;
1162    }
1163    
1164    
1165  // const double*  const DataTypes::RealVectorType*
1166  // DataLazy::resolveUnary(ValueType& v,int sampleNo,  size_t offset) const  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1167  // {  {
1168  //  // we assume that any collapsing has been done before we get here    if (m_readytype!='E')
1169  //  // since we only have one argument we don't need to think about only    {
1170  //  // processing single points.      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1171  //   if (m_readytype!='E')    }
1172  //   {    if (m_op==IDENTITY)
1173  //     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");    {
1174  //   }      throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1175  //   const double* left=m_left->resolveSample(v,sampleNo,offset);    }
1176  //   double* result=&(v[offset]);    size_t subroffset;
1177  //   switch (m_op)    size_t offset;
1178  //   {    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1179  //     case SIN:        roffset=m_samplesize*tid;
1180  //  tensor_unary_operation(m_samplesize, left, result, ::sin);    offset=roffset;
1181  //  break;    size_t loop=0;
1182  //     case COS:    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1183  //  tensor_unary_operation(m_samplesize, left, result, ::cos);    size_t outstep=getNoValues();
1184  //  break;    size_t instep=m_left->getNoValues();
1185  //     case TAN:    switch (m_op)
1186  //  tensor_unary_operation(m_samplesize, left, result, ::tan);    {
1187  //  break;      case SWAP:
1188  //     case ASIN:          for (loop=0;loop<numsteps;++loop)
1189  //  tensor_unary_operation(m_samplesize, left, result, ::asin);          {
1190  //  break;              escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1191  //     case ACOS:              subroffset+=instep;
1192  //  tensor_unary_operation(m_samplesize, left, result, ::acos);              offset+=outstep;
1193  //  break;          }
1194  //     case ATAN:          break;
1195  //  tensor_unary_operation(m_samplesize, left, result, ::atan);      default:
1196  //  break;          throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1197  //     case SINH:    }
1198  //  tensor_unary_operation(m_samplesize, left, result, ::sinh);    return &m_samples;
1199  //  break;  }
 //     case COSH:  
 //  tensor_unary_operation(m_samplesize, left, result, ::cosh);  
 //  break;  
 //     case TANH:  
 //  tensor_unary_operation(m_samplesize, left, result, ::tanh);  
 //  break;  
 //     case ERF:  
 // #ifdef _WIN32  
 //  throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::erf);  
 //  break;  
 // #endif  
 //    case ASINH:  
 // #ifdef _WIN32  
 //  tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 // #endif    
 //  break;  
 //    case ACOSH:  
 // #ifdef _WIN32  
 //  tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 // #endif    
 //  break;  
 //    case ATANH:  
 // #ifdef _WIN32  
 //  tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 // #else  
 //  tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 // #endif    
 //  break;  
 //     case LOG10:  
 //  tensor_unary_operation(m_samplesize, left, result, ::log10);  
 //  break;  
 //     case LOG:  
 //  tensor_unary_operation(m_samplesize, left, result, ::log);  
 //  break;  
 //     case SIGN:  
 //  tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
 //  break;  
 //     case ABS:  
 //  tensor_unary_operation(m_samplesize, left, result, ::fabs);  
 //  break;  
 //     case NEG:  
 //  tensor_unary_operation(m_samplesize, left, result, negate<double>());  
 //  break;  
 //     case POS:  
 //  // it doesn't mean anything for delayed.  
 //  // it will just trigger a deep copy of the lazy object  
 //  throw DataException("Programmer error - POS not supported for lazy data.");  
 //  break;  
 //     case EXP:  
 //  tensor_unary_operation(m_samplesize, left, result, ::exp);  
 //  break;  
 //     case SQRT:  
 //  tensor_unary_operation(m_samplesize, left, result, ::sqrt);  
 //  break;  
 //     case RECIP:  
 //  tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
 //  break;  
 //     case GZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
 //  break;  
 //     case LZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
 //  break;  
 //     case GEZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
 //  break;  
 //     case LEZ:  
 //  tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
 //  break;  
 //  
 //     default:  
 //  throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");  
 //   }  
 //   return result;  
 // }  
   
 #define PROC_OP(X) \  
     for (int i=0;i<steps;++i,resultp+=getNoValues()) \  
     { \  
 cout << "Step#" << i << " chunk=" << chunksize << endl; \  
 cout << left[0] << left[1] << left[2] << endl; \  
 cout << right[0] << right[1] << right[2] << endl; \  
        tensor_binary_operation(chunksize, left, right, resultp, X); \  
        left+=leftStep; \  
        right+=rightStep; \  
 cout << "Result=" << result << " " << result[0] << result[1] << result[2] << endl; \  
     }  
   
 DataTypes::ValueType*  
 DataLazy::resolveBinary(ValueType& v,  size_t offset ,int sampleNo, size_t& roffset) const  
 {  
     // again we assume that all collapsing has already been done  
     // so we have at least one expanded child.  
     // however, we could still have one of the children being not expanded.  
1200    
1201  cout << "Resolve binary: " << toString() << endl;  const DataTypes::RealVectorType*
1202    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1203    {
1204      if (m_readytype!='E')
1205      {
1206        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1207      }
1208      if (m_op!=CONDEVAL)
1209      {
1210        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1211      }
1212      size_t subroffset;
1213    
1214      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1215      const RealVectorType* srcres=0;
1216      if ((*maskres)[subroffset]>0)
1217      {
1218            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1219      }
1220      else
1221      {
1222            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1223      }
1224    
1225      // Now we need to copy the result
1226    
1227      roffset=m_samplesize*tid;
1228      for (int i=0;i<m_samplesize;++i)
1229      {
1230            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1231      }
1232    
1233      return &m_samples;
1234    }
1235    
1236    size_t lroffset=0, rroffset=0;  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1237    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1238    // If both children are expanded, then we can process them in a single operation (we treat
1239    // the whole sample as one big datapoint.
1240    // If one of the children is not expanded, then we need to treat each point in the sample
1241    // individually.
1242    // There is an additional complication when scalar operations are considered.
1243    // For example, 2+Vector.
1244    // In this case each double within the point is treated individually
1245    const DataTypes::RealVectorType*
1246    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1247    {
1248    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1249    
1250      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1251            // first work out which of the children are expanded
1252    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1253    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1254    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step    if (!leftExp && !rightExp)
1255    int steps=(bigloops?1:getNumDPPSample());    {
1256    size_t chunksize=(bigloops? m_samplesize : getNoValues());          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1257    int leftStep=((leftExp && !rightExp)? getNoValues() : 0);    }
1258    int rightStep=((rightExp && !leftExp)? getNoValues() : 0);    bool leftScalar=(m_left->getRank()==0);
1259      bool rightScalar=(m_right->getRank()==0);
1260    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1261    const ValueType* right=m_right->resolveSample(v,offset,sampleNo,rroffset);        {
1262      // now we need to know which args are expanded          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1263  cout << "left=" << left << " right=" << right << endl;    }
1264  cout << "(Length) l=" << left->size() << " r=" << right->size() << " res=" << v.size() << endl;    size_t leftsize=m_left->getNoValues();
1265    double* resultp=&(v[offset]);    size_t rightsize=m_right->getNoValues();
1266      size_t chunksize=1;                   // how many doubles will be processed in one go
1267      int leftstep=0;               // how far should the left offset advance after each step
1268      int rightstep=0;
1269      int numsteps=0;               // total number of steps for the inner loop
1270      int oleftstep=0;      // the o variables refer to the outer loop
1271      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1272      int onumsteps=1;
1273      
1274      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1275      bool RES=(rightExp && rightScalar);
1276      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1277      bool RS=(!rightExp && rightScalar);
1278      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1279      bool RN=(!rightExp && !rightScalar);
1280      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1281      bool REN=(rightExp && !rightScalar);
1282    
1283      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1284      {
1285            chunksize=m_left->getNumDPPSample()*leftsize;
1286            leftstep=0;
1287            rightstep=0;
1288            numsteps=1;
1289      }
1290      else if (LES || RES)
1291      {
1292            chunksize=1;
1293            if (LES)                // left is an expanded scalar
1294            {
1295                    if (RS)
1296                    {
1297                       leftstep=1;
1298                       rightstep=0;
1299                       numsteps=m_left->getNumDPPSample();
1300                    }
1301                    else            // RN or REN
1302                    {
1303                       leftstep=0;
1304                       oleftstep=1;
1305                       rightstep=1;
1306                       orightstep=(RN ? -(int)rightsize : 0);
1307                       numsteps=rightsize;
1308                       onumsteps=m_left->getNumDPPSample();
1309                    }
1310            }
1311            else            // right is an expanded scalar
1312            {
1313                    if (LS)
1314                    {
1315                       rightstep=1;
1316                       leftstep=0;
1317                       numsteps=m_right->getNumDPPSample();
1318                    }
1319                    else
1320                    {
1321                       rightstep=0;
1322                       orightstep=1;
1323                       leftstep=1;
1324                       oleftstep=(LN ? -(int)leftsize : 0);
1325                       numsteps=leftsize;
1326                       onumsteps=m_right->getNumDPPSample();
1327                    }
1328            }
1329      }
1330      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1331      {
1332            if (LEN)        // and Right will be a single value
1333            {
1334                    chunksize=rightsize;
1335                    leftstep=rightsize;
1336                    rightstep=0;
1337                    numsteps=m_left->getNumDPPSample();
1338                    if (RS)
1339                    {
1340                       numsteps*=leftsize;
1341                    }
1342            }
1343            else    // REN
1344            {
1345                    chunksize=leftsize;
1346                    rightstep=leftsize;
1347                    leftstep=0;
1348                    numsteps=m_right->getNumDPPSample();
1349                    if (LS)
1350                    {
1351                       numsteps*=rightsize;
1352                    }
1353            }
1354      }
1355    
1356      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1357            // Get the values of sub-expressions
1358      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1359      const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1360    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1361    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1362    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1363    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1364    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1365    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1366    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1367    
1368    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1369    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1370    
1371    
1372      roffset=m_samplesize*tid;
1373      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1374    switch(m_op)    switch(m_op)
1375    {    {
1376      case ADD:      case ADD:
1377      for (int i=0;i<steps;++i,resultp+=getNoValues())          //PROC_OP(NO_ARG,plus<double>());
1378      {        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1379  cerr << "Step#" << i << " chunk=" << chunksize << endl;               &(*left)[0],
1380  cerr << left << "[" << lroffset << "] " << right << "[" << rroffset << "]" << endl;               &(*right)[0],
1381         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, plus<double>());               chunksize,
1382         lroffset+=leftStep;               onumsteps,
1383         rroffset+=rightStep;               numsteps,
1384  cerr << "left=" << lroffset << " right=" << rroffset << endl;               resultStep,
1385      }               leftstep,
1386      break;               rightstep,
1387  // need to fill in the rest               oleftstep,
1388                 orightstep,
1389                 lroffset,
1390                 rroffset,
1391                 escript::ES_optype::ADD);  
1392            break;
1393        case SUB:
1394          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1395                 &(*left)[0],
1396                 &(*right)[0],
1397                 chunksize,
1398                 onumsteps,
1399                 numsteps,
1400                 resultStep,
1401                 leftstep,
1402                 rightstep,
1403                 oleftstep,
1404                 orightstep,
1405                 lroffset,
1406                 rroffset,
1407                 escript::ES_optype::SUB);        
1408            //PROC_OP(NO_ARG,minus<double>());
1409            break;
1410        case MUL:
1411            //PROC_OP(NO_ARG,multiplies<double>());
1412          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1413                 &(*left)[0],
1414                 &(*right)[0],
1415                 chunksize,
1416                 onumsteps,
1417                 numsteps,
1418                 resultStep,
1419                 leftstep,
1420                 rightstep,
1421                 oleftstep,
1422                 orightstep,
1423                 lroffset,
1424                 rroffset,
1425                 escript::ES_optype::MUL);        
1426            break;
1427        case DIV:
1428            //PROC_OP(NO_ARG,divides<double>());
1429          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1430                 &(*left)[0],
1431                 &(*right)[0],
1432                 chunksize,
1433                 onumsteps,
1434                 numsteps,
1435                 resultStep,
1436                 leftstep,
1437                 rightstep,
1438                 oleftstep,
1439                 orightstep,
1440                 lroffset,
1441                 rroffset,
1442                 escript::ES_optype::DIV);        
1443            break;
1444        case POW:
1445           //PROC_OP(double (double,double),::pow);
1446          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1447                 &(*left)[0],
1448                 &(*right)[0],
1449                 chunksize,
1450                 onumsteps,
1451                 numsteps,
1452                 resultStep,
1453                 leftstep,
1454                 rightstep,
1455                 oleftstep,
1456                 orightstep,
1457                 lroffset,
1458                 rroffset,
1459                 escript::ES_optype::POW);        
1460            break;
1461        default:
1462            throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1463      }
1464    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1465      return &m_samples;
1466    }
1467    
1468    
1469    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1470    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1471    // unlike the other resolve helpers, we must treat these datapoints separately.
1472    const DataTypes::RealVectorType*
1473    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1474    {
1475    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1476    
1477      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1478            // first work out which of the children are expanded
1479      bool leftExp=(m_left->m_readytype=='E');
1480      bool rightExp=(m_right->m_readytype=='E');
1481      int steps=getNumDPPSample();
1482      int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1483      int rightStep=(rightExp?m_right->getNoValues() : 0);
1484    
1485      int resultStep=getNoValues();
1486      roffset=m_samplesize*tid;
1487      size_t offset=roffset;
1488    
1489      const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1490    
1491      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1492    
1493    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1494    cout << getNoValues() << endl;)
1495    
1496    
1497    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1498    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1499    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1500    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1501    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1502    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1503    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1504    
1505      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1506      switch(m_op)
1507      {
1508        case PROD:
1509            for (int i=0;i<steps;++i,resultp+=resultStep)
1510            {
1511              const double *ptr_0 = &((*left)[lroffset]);
1512              const double *ptr_1 = &((*right)[rroffset]);
1513    
1514    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1515    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1516    
1517              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1518    
1519              lroffset+=leftStep;
1520              rroffset+=rightStep;
1521            }
1522            break;
1523      default:      default:
1524      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1525    }    }
1526    roffset=offset;    roffset=offset;
1527    return &v;    return &m_samples;
1528  }  }
1529    
1530    
1531    const DataTypes::RealVectorType*
1532    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1533    {
1534    #ifdef _OPENMP
1535            int tid=omp_get_thread_num();
1536    #else
1537            int tid=0;
1538    #endif
1539    
1540    #ifdef LAZY_STACK_PROF
1541            stackstart[tid]=&tid;
1542            stackend[tid]=&tid;
1543            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1544            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1545            #pragma omp critical
1546            if (d>maxstackuse)
1547            {
1548    cout << "Max resolve Stack use " << d << endl;
1549                    maxstackuse=d;
1550            }
1551            return r;
1552    #else
1553            return resolveNodeSample(tid, sampleNo, roffset);
1554    #endif
1555    }
1556    
1557    
1558    // This needs to do the work of the identity constructor
1559    void
1560    DataLazy::resolveToIdentity()
1561    {
1562       if (m_op==IDENTITY)
1563            return;
1564       DataReady_ptr p=resolveNodeWorker();
1565       makeIdentity(p);
1566    }
1567    
1568    void DataLazy::makeIdentity(const DataReady_ptr& p)
1569    {
1570       m_op=IDENTITY;
1571       m_axis_offset=0;
1572       m_transpose=0;
1573       m_SL=m_SM=m_SR=0;
1574       m_children=m_height=0;
1575       m_id=p;
1576       if(p->isConstant()) {m_readytype='C';}
1577       else if(p->isExpanded()) {m_readytype='E';}
1578       else if (p->isTagged()) {m_readytype='T';}
1579       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1580       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1581       m_left.reset();
1582       m_right.reset();
1583    }
1584    
1585    
1586  // #define PROC_OP(X) \  DataReady_ptr
1587  //  for (int i=0;i<steps;++i,resultp+=getNoValues()) \  DataLazy::resolve()
 //  { \  
 // cout << "Step#" << i << " chunk=" << chunksize << endl; \  
 // cout << left[0] << left[1] << left[2] << endl; \  
 // cout << right[0] << right[1] << right[2] << endl; \  
 //     tensor_binary_operation(chunksize, left, right, resultp, X); \  
 //     left+=leftStep; \  
 //     right+=rightStep; \  
 // cout << "Result=" << result << " " << result[0] << result[1] << result[2] << endl; \  
 //  }  
 //  
 // const double*  
 // DataLazy::resolveBinary(ValueType& v,int sampleNo,  size_t offset) const  
 // {  
 //  // again we assume that all collapsing has already been done  
 //  // so we have at least one expanded child.  
 //  // however, we could still have one of the children being not expanded.  
 //  
 // cout << "Resolve binary: " << toString() << endl;  
 //  
 //   const double* left=m_left->resolveSample(v,sampleNo,offset);  
 // // cout << "Done Left " << /*left[0] << left[1] << left[2] << */endl;  
 //   const double* right=m_right->resolveSample(v,sampleNo,offset);  
 // // cout << "Done Right"  << /*right[0] << right[1] << right[2] <<*/ endl;  
 //      // now we need to know which args are expanded  
 //   bool leftExp=(m_left->m_readytype=='E');  
 //   bool rightExp=(m_right->m_readytype=='E');  
 //   bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp));  // is processing in single step  
 //   int steps=(bigloops?1:getNumSamples());  
 //   size_t chunksize=(bigloops? m_samplesize : getNoValues());  
 //   int leftStep=((leftExp && !rightExp)? getNoValues() : 0);  
 //   int rightStep=((rightExp && !leftExp)? getNoValues() : 0);  
 // cout << "left=" << left << " right=" << right << endl;  
 //   double* result=&(v[offset]);  
 //   double* resultp=result;  
 //   switch(m_op)  
 //   {  
 //     case ADD:  
 //  for (int i=0;i<steps;++i,resultp+=getNoValues())  
 //  {  
 // cout << "Step#" << i << " chunk=" << chunksize << endl; \  
 // // cout << left[0] << left[1] << left[2] << endl;  
 // // cout << right[0] << right[1] << right[2] << endl;  
 //     tensor_binary_operation(chunksize, left, right, resultp, plus<double>());  
 // cout << "left=" << left << " right=" << right << " resp=" << resultp << endl;  
 //     left+=leftStep;  
 //     right+=rightStep;  
 // cout << "left=" << left << " right=" << right << endl;  
 // // cout << "Result=" << result << " " << result[0] << result[1] << result[2] << endl;  
 //  }  
 //  break;  
 // // need to fill in the rest  
 //     default:  
 //  throw DataException("Programmer error - resolveBinay can not resolve operator "+opToString(m_op)+".");  
 //   }  
 // // cout << "About to return "  << result[0] << result[1] << result[2] << endl;;  
 //   return result;  
 // }  
   
 // // the vector and the offset are a place where the method could write its data if it wishes  
 // // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // // Hence the return value to indicate where the data is actually stored.  
 // // Regardless, the storage should be assumed to be used, even if it isn't.  
 // const double*  
 // DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset )  
 // {  
 // cout << "Resolve sample " << toString() << endl;  
 //  // collapse so we have a 'E' node or an IDENTITY for some other type  
 //   if (m_readytype!='E' && m_op!=IDENTITY)  
 //   {  
 //  collapse();  
 //   }  
 //   if (m_op==IDENTITY)      
 //   {  
 //     const ValueType& vec=m_id->getVector();  
 //     if (m_readytype=='C')  
 //     {  
 //  return &(vec[0]);  
 //     }  
 //     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
 //   }  
 //   if (m_readytype!='E')  
 //   {  
 //     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
 //   }  
 //   switch (getOpgroup(m_op))  
 //   {  
 //   case G_UNARY: return resolveUnary(v,sampleNo,offset);  
 //   case G_BINARY: return resolveBinary(v,sampleNo,offset);  
 //   default:  
 //     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
 //   }  
 // }  
   
   
   
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
1588  {  {
1589  cout << "Resolve sample " << toString() << endl;      resolveToIdentity();
1590      // collapse so we have a 'E' node or an IDENTITY for some other type      return m_id;
1591    if (m_readytype!='E' && m_op!=IDENTITY)  }
1592    
1593    
1594    /* This is really a static method but I think that caused problems in windows */
1595    void
1596    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1597    {
1598      if (dats.empty())
1599    {    {
1600      collapse();          return;
1601    }    }
1602    if (m_op==IDENTITY)      vector<DataLazy*> work;
1603      FunctionSpace fs=dats[0]->getFunctionSpace();
1604      bool match=true;
1605      for (int i=dats.size()-1;i>=0;--i)
1606    {    {
1607      const ValueType& vec=m_id->getVector();          if (dats[i]->m_readytype!='E')
1608      if (m_readytype=='C')          {
1609      {                  dats[i]->collapse();
1610      roffset=0;          }
1611      return &(vec);          if (dats[i]->m_op!=IDENTITY)
1612      }          {
1613      roffset=m_id->getPointOffset(sampleNo, 0);                  work.push_back(dats[i]);
1614      return &(vec);                  if (fs!=dats[i]->getFunctionSpace())
1615                    {
1616                            match=false;
1617                    }
1618            }
1619    }    }
1620    if (m_readytype!='E')    if (work.empty())
1621    {    {
1622      throw DataException("Programmer Error - Collapse did not produce an expanded node.");          return;         // no work to do
1623    }    }
1624    switch (getOpgroup(m_op))    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1625      {             // it is possible that dats[0] is one of the objects which we discarded and
1626                    // all the other functionspaces match.
1627            vector<DataExpanded*> dep;
1628            vector<RealVectorType*> vecs;
1629            for (int i=0;i<work.size();++i)
1630            {
1631                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1632                    vecs.push_back(&(dep[i]->getVectorRW()));
1633            }
1634            int totalsamples=work[0]->getNumSamples();
1635            const RealVectorType* res=0; // Storage for answer
1636            int sample;
1637            #pragma omp parallel private(sample, res)
1638            {
1639                size_t roffset=0;
1640                #pragma omp for schedule(static)
1641                for (sample=0;sample<totalsamples;++sample)
1642                {
1643                    roffset=0;
1644                    int j;
1645                    for (j=work.size()-1;j>=0;--j)
1646                    {
1647    #ifdef _OPENMP
1648                        res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1649    #else
1650                        res=work[j]->resolveNodeSample(0,sample,roffset);
1651    #endif
1652                        RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1653                        memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1654                    }
1655                }
1656            }
1657            // Now we need to load the new results as identity ops into the lazy nodes
1658            for (int i=work.size()-1;i>=0;--i)
1659            {
1660                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1661            }
1662      }
1663      else  // functionspaces do not match
1664    {    {
1665    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);          for (int i=0;i<work.size();++i)
1666    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);          {
1667    default:                  work[i]->resolveToIdentity();
1668      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");          }
1669    }    }
1670  }  }
1671    
1672    
1673    
1674    // This version of resolve uses storage in each node to hold results
 // This version uses double* trying again with vectors  
 // DataReady_ptr  
 // DataLazy::resolve()  
 // {  
 //  
 // cout << "Sample size=" << m_samplesize << endl;  
 // cout << "Buffers=" << m_buffsRequired << endl;  
 //  
 //   if (m_readytype!='E')  
 //   {  
 //     collapse();  
 //   }  
 //   if (m_op==IDENTITY)  
 //   {  
 //     return m_id;  
 //   }  
 //      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'  
 //   size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  
 //   int numthreads=1;  
 // #ifdef _OPENMP  
 //   numthreads=getNumberOfThreads();  
 //   int threadnum=0;  
 // #endif  
 //   ValueType v(numthreads*threadbuffersize);    
 // cout << "Buffer created with size=" << v.size() << endl;  
 //   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
 //   ValueType& resvec=result->getVector();  
 //   DataReady_ptr resptr=DataReady_ptr(result);  
 //   int sample;  
 //   int resoffset;  
 //   int totalsamples=getNumSamples();  
 //   const double* res=0;  
 //   #pragma omp parallel for private(sample,resoffset,threadnum,res) schedule(static)  
 //   for (sample=0;sample<totalsamples;++sample)  
 //   {  
 // cout << "################################# " << sample << endl;  
 // #ifdef _OPENMP  
 //     res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());  
 // #else  
 //     res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.  
 // #endif  
 // cerr << "-------------------------------- " << endl;  
 //     resoffset=result->getPointOffset(sample,0);  
 // cerr << "offset=" << resoffset << endl;  
 //     for (unsigned int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  
 //     {  
 //  resvec[resoffset]=res[i];  
 //     }  
 // cerr << "*********************************" << endl;  
 //   }  
 //   return resptr;  
 // }  
   
   
1675  DataReady_ptr  DataReady_ptr
1676  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1677  {  {
1678      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
   if (m_readytype!='E')  
1679    {    {
1680      collapse();      collapse();
1681    }    }
1682    if (m_op==IDENTITY)    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1683    {    {
1684      return m_id;      return m_id;
1685    }    }
1686      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1687    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1688    int numthreads=1;    RealVectorType& resvec=result->getVectorRW();
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
   int threadnum=0;  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVector();  
1689    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1690    
1691    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1692    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1693    const ValueType* res=0;    const RealVectorType* res=0;       // Storage for answer
1694    size_t resoffset=0;  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1695    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1696    {    {
1697  cout << "################################# " << sample << endl;          size_t roffset=0;
1698    #ifdef LAZY_STACK_PROF
1699            stackstart[omp_get_thread_num()]=&roffset;
1700            stackend[omp_get_thread_num()]=&roffset;
1701    #endif
1702            #pragma omp for schedule(static)
1703            for (sample=0;sample<totalsamples;++sample)
1704            {
1705                    roffset=0;
1706  #ifdef _OPENMP  #ifdef _OPENMP
1707      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1708  #else  #else
1709      res=resolveSample(v,0,sample,resoffset);   // this would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1710  #endif  #endif
1711  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1712      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1713  cerr << "offset=" << outoffset << endl;                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1714      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1715      {          }
     resvec[outoffset]=(*res)[resoffset];  
     }  
 cerr << "*********************************" << endl;  
1716    }    }
1717    #ifdef LAZY_STACK_PROF
1718      for (int i=0;i<getNumberOfThreads();++i)
1719      {
1720            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1721    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1722            if (r>maxstackuse)
1723            {
1724                    maxstackuse=r;
1725            }
1726      }
1727      cout << "Max resolve Stack use=" << maxstackuse << endl;
1728    #endif
1729    return resptr;    return resptr;
1730  }  }
1731    
# Line 943  std::string Line 1733  std::string
1733  DataLazy::toString() const  DataLazy::toString() const
1734  {  {
1735    ostringstream oss;    ostringstream oss;
1736    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1737    intoString(oss);    switch (escriptParams.getLazyStrFmt())
1738      {
1739      case 1:       // tree format
1740            oss << endl;
1741            intoTreeString(oss,"");
1742            break;
1743      case 2:       // just the depth
1744            break;
1745      default:
1746            intoString(oss);
1747            break;
1748      }
1749    return oss.str();    return oss.str();
1750  }  }
1751    
1752    
1753  void  void
1754  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1755  {  {
1756    //    oss << "[" << m_children <<";"<<m_height <<"]";
1757    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1758    {    {
1759    case G_IDENTITY:    case G_IDENTITY:
1760      if (m_id->isExpanded())          if (m_id->isExpanded())
1761      {          {
1762         oss << "E";             oss << "E";
1763      }          }
1764      else if (m_id->isTagged())          else if (m_id->isTagged())
1765      {          {
1766        oss << "T";            oss << "T";
1767      }          }
1768      else if (m_id->isConstant())          else if (m_id->isConstant())
1769      {          {
1770        oss << "C";            oss << "C";
1771      }          }
1772      else          else
1773      {          {
1774        oss << "?";            oss << "?";
1775      }          }
1776      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1777      break;          break;
1778    case G_BINARY:    case G_BINARY:
1779      oss << '(';          oss << '(';
1780      m_left->intoString(oss);          m_left->intoString(oss);
1781      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1782      m_right->intoString(oss);          m_right->intoString(oss);
1783      oss << ')';          oss << ')';
1784      break;          break;
1785    case G_UNARY:    case G_UNARY:
1786      oss << opToString(m_op) << '(';    case G_UNARY_P:
1787      m_left->intoString(oss);    case G_NP1OUT:
1788      oss << ')';    case G_NP1OUT_P:
1789      break;    case G_REDUCTION:
1790            oss << opToString(m_op) << '(';
1791            m_left->intoString(oss);
1792            oss << ')';
1793            break;
1794      case G_TENSORPROD:
1795            oss << opToString(m_op) << '(';
1796            m_left->intoString(oss);
1797            oss << ", ";
1798            m_right->intoString(oss);
1799            oss << ')';
1800            break;
1801      case G_NP1OUT_2P:
1802            oss << opToString(m_op) << '(';
1803            m_left->intoString(oss);
1804            oss << ", " << m_axis_offset << ", " << m_transpose;
1805            oss << ')';
1806            break;
1807      case G_CONDEVAL:
1808            oss << opToString(m_op)<< '(' ;
1809            m_mask->intoString(oss);
1810            oss << " ? ";
1811            m_left->intoString(oss);
1812            oss << " : ";
1813            m_right->intoString(oss);
1814            oss << ')';
1815            break;
1816    default:    default:
1817      oss << "UNKNOWN";          oss << "UNKNOWN";
1818    }    }
1819  }  }
1820    
1821  // Note that in this case, deepCopy does not make copies of the leaves.  
1822  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1823    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1824    {
1825      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1826      switch (getOpgroup(m_op))
1827      {
1828      case G_IDENTITY:
1829            if (m_id->isExpanded())
1830            {
1831               oss << "E";
1832            }
1833            else if (m_id->isTagged())
1834            {
1835              oss << "T";
1836            }
1837            else if (m_id->isConstant())
1838            {
1839              oss << "C";
1840            }
1841            else
1842            {
1843              oss << "?";
1844            }
1845            oss << '@' << m_id.get() << endl;
1846            break;
1847      case G_BINARY:
1848            oss << opToString(m_op) << endl;
1849            indent+='.';
1850            m_left->intoTreeString(oss, indent);
1851            m_right->intoTreeString(oss, indent);
1852            break;
1853      case G_UNARY:
1854      case G_UNARY_P:
1855      case G_NP1OUT:
1856      case G_NP1OUT_P:
1857      case G_REDUCTION:
1858            oss << opToString(m_op) << endl;
1859            indent+='.';
1860            m_left->intoTreeString(oss, indent);
1861            break;
1862      case G_TENSORPROD:
1863            oss << opToString(m_op) << endl;
1864            indent+='.';
1865            m_left->intoTreeString(oss, indent);
1866            m_right->intoTreeString(oss, indent);
1867            break;
1868      case G_NP1OUT_2P:
1869            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1870            indent+='.';
1871            m_left->intoTreeString(oss, indent);
1872            break;
1873      default:
1874            oss << "UNKNOWN";
1875      }
1876    }
1877    
1878    
1879  DataAbstract*  DataAbstract*
1880  DataLazy::deepCopy()  DataLazy::deepCopy() const
1881  {  {
1882    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1883    {    {
1884      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1885      case G_UNARY:
1886      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1887      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1888      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1889      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1890      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1891      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1892      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1893      default:
1894            throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1895    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1896  }  }
1897    
1898    // For this, we don't care what op you were doing because the answer is now zero
1899    DataAbstract*
1900    DataLazy::zeroedCopy() const
1901    {
1902      return new DataLazy(m_id->zeroedCopy()->getPtr());
1903    }
1904    
1905  DataTypes::ValueType::size_type  // There is no single, natural interpretation of getLength on DataLazy.
1906    // Instances of DataReady can look at the size of their vectors.
1907    // For lazy though, it could be the size the data would be if it were resolved;
1908    // or it could be some function of the lengths of the DataReady instances which
1909    // form part of the expression.
1910    // Rather than have people making assumptions, I have disabled the method.
1911    DataTypes::RealVectorType::size_type
1912  DataLazy::getLength() const  DataLazy::getLength() const
1913  {  {
1914    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1915  }  }
1916    
1917    
# Line 1015  DataLazy::getSlice(const DataTypes::Regi Line 1921  DataLazy::getSlice(const DataTypes::Regi
1921    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1922  }  }
1923    
1924  DataTypes::ValueType::size_type  
1925    // To do this we need to rely on our child nodes
1926    DataTypes::RealVectorType::size_type
1927    DataLazy::getPointOffset(int sampleNo,
1928                     int dataPointNo)
1929    {
1930      if (m_op==IDENTITY)
1931      {
1932            return m_id->getPointOffset(sampleNo,dataPointNo);
1933      }
1934      if (m_readytype!='E')
1935      {
1936            collapse();
1937            return m_id->getPointOffset(sampleNo,dataPointNo);
1938      }
1939      // at this point we do not have an identity node and the expression will be Expanded
1940      // so we only need to know which child to ask
1941      if (m_left->m_readytype=='E')
1942      {
1943            return m_left->getPointOffset(sampleNo,dataPointNo);
1944      }
1945      else
1946      {
1947            return m_right->getPointOffset(sampleNo,dataPointNo);
1948      }
1949    }
1950    
1951    // To do this we need to rely on our child nodes
1952    DataTypes::RealVectorType::size_type
1953  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1954                   int dataPointNo) const                   int dataPointNo) const
1955  {  {
1956    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1957      {
1958            return m_id->getPointOffset(sampleNo,dataPointNo);
1959      }
1960      if (m_readytype=='E')
1961      {
1962        // at this point we do not have an identity node and the expression will be Expanded
1963        // so we only need to know which child to ask
1964        if (m_left->m_readytype=='E')
1965        {
1966            return m_left->getPointOffset(sampleNo,dataPointNo);
1967        }
1968        else
1969        {
1970            return m_right->getPointOffset(sampleNo,dataPointNo);
1971        }
1972      }
1973      if (m_readytype=='C')
1974      {
1975            return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1976      }
1977      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1978    }
1979    
1980    
1981    // I have decided to let Data:: handle this issue.
1982    void
1983    DataLazy::setToZero()
1984    {
1985    //   DataTypes::RealVectorType v(getNoValues(),0);
1986    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1987    //   m_op=IDENTITY;
1988    //   m_right.reset();  
1989    //   m_left.reset();
1990    //   m_readytype='C';
1991    //   m_buffsRequired=1;
1992    
1993      (void)privdebug;  // to stop the compiler complaining about unused privdebug
1994      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1995    }
1996    
1997    bool
1998    DataLazy::actsExpanded() const
1999    {
2000            return (m_readytype=='E');
2001  }  }
2002    
2003  }   // end namespace  } // end namespace
2004    

Legend:
Removed from v.1898  
changed lines
  Added in v.6511

  ViewVC Help
Powered by ViewVC 1.1.26