/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC branches/clazy/escriptcore/src/DataLazy.cpp revision 6512 by jfenwick, Fri Mar 3 02:01:57 2017 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Apache License, version 2.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.apache.org/licenses/LICENSE-2.0
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
 #ifdef USE_NETCDF  
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
18  #include "Data.h"  #include "Data.h"
19  #include "UnaryFuncs.h"     // for escript::fsign  #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "Utils.h"
23    #include "DataVectorOps.h"
24    
25    #include <iomanip> // for some fancy formatting in debug
26    
27    using namespace escript::DataTypes;
28    
29    #define NO_ARG
30    
31    // #define LAZYDEBUG(X) if (privdebug){X;}
32    #define LAZYDEBUG(X)
33    namespace
34    {
35    bool privdebug=false;
36    
37    #define ENABLEDEBUG privdebug=true;
38    #define DISABLEDEBUG privdebug=false;
39    }
40    
41    //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42    
43    //#define SIZELIMIT if ((m_height>escript::escriptParams.getInt("TOO_MANY_LEVELS")) || (m_children>escript::escriptParams.getInt("TOO_MANY_NODES"))) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44    
45    
46    #define SIZELIMIT \
47        if (m_height > escript::escriptParams.getTooManyLevels()) {\
48            if (escript::escriptParams.getLazyVerbose()) {\
49                cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;\
50            }\
51            resolveToIdentity();\
52        }
53    
54    /*
55    How does DataLazy work?
56    ~~~~~~~~~~~~~~~~~~~~~~~
57    
58    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
59    denoted left and right.
60    
61    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
62    This means that all "internal" nodes in the structure are instances of DataLazy.
63    
64    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65    Note that IDENTITY is not considered a unary operation.
66    
67    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68    It must however form a DAG (directed acyclic graph).
69    I will refer to individual DataLazy objects with the structure as nodes.
70    
71    Each node also stores:
72    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73            evaluated.
74    - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75            evaluate the expression.
76    - m_samplesize ~ the number of doubles stored in a sample.
77    
78    When a new node is created, the above values are computed based on the values in the child nodes.
79    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
80    
81    The resolve method, which produces a DataReady from a DataLazy, does the following:
82    1) Create a DataReady to hold the new result.
83    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
84    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
85    
86    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
87    
88    resolveSample returns a Vector* and an offset within that vector where the result is stored.
89    Normally, this would be v, but for identity nodes their internal vector is returned instead.
90    
91    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
92    
93    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
94    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
95    
96    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
97    1) Add to the ES_optype.
98    2) determine what opgroup your operation belongs to (X)
99    3) add a string for the op to the end of ES_opstrings
100    4) increase ES_opcount
101    5) add an entry (X) to opgroups
102    6) add an entry to the switch in collapseToReady
103    7) add an entry to resolveX
104    */
105    
106    
107  using namespace std;  using namespace std;
108  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 110  using namespace boost;
110  namespace escript  namespace escript
111  {  {
112    
 const std::string&  
 opToString(ES_optype op);  
   
113  namespace  namespace
114  {  {
115    
116    
117    // enabling this will print out when ever the maximum stacksize used by resolve increases
118    // it assumes _OPENMP is also in use
119    //#define LAZY_STACK_PROF
120    
 enum ES_opgroup  
 {  
    G_UNKNOWN,  
    G_IDENTITY,  
    G_BINARY,  
    G_UNARY  
 };  
121    
122    
123    #ifndef _OPENMP
124      #ifdef LAZY_STACK_PROF
125      #undef LAZY_STACK_PROF
126      #endif
127    #endif
128    
129    
130  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  #ifdef LAZY_STACK_PROF
131              "asin","acos","atan","sinh","cosh","tanh","erf",  std::vector<void*> stackstart(getNumberOfThreads());
132              "asinh","acosh","atanh",  std::vector<void*> stackend(getNumberOfThreads());
133              "log10","log","sign","abs","neg","pos","exp","sqrt",  size_t maxstackuse=0;
134              "1/","where>0","where<0","where>=0","where<=0"};  #endif
135  int ES_opcount=32;  
 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16  
             G_UNARY,G_UNARY,G_UNARY,                    // 19  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};  
 inline  
 ES_opgroup  
 getOpgroup(ES_optype op)  
 {  
   return opgroups[op];  
 }  
136    
137  // return the FunctionSpace of the result of "left op right"  // return the FunctionSpace of the result of "left op right"
138  FunctionSpace  FunctionSpace
139  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
140  {  {
141      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
142      // maybe we need an interpolate node -          // maybe we need an interpolate node -
143      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
144      // programming error exception.          // programming error exception.
145    
146      FunctionSpace l=left->getFunctionSpace();
147      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
148      {    if (l!=r)
149          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
150      }      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
151      return left->getFunctionSpace();      if (res==1)
152        {
153            return l;
154        }
155        if (res==-1)
156        {
157            return r;
158        }
159        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
160      }
161      return l;
162  }  }
163    
164  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
165    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
166  DataTypes::ShapeType  DataTypes::ShapeType
167  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
168  {  {
169      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
170      {          {
171          throw DataException("Shapes not the same - shapes must match for lazy data.");            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
172      }            {
173      return left->getShape();                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
174              }
175    
176              if (left->getRank()==0)       // we need to allow scalar * anything
177              {
178                    return right->getShape();
179              }
180              if (right->getRank()==0)
181              {
182                    return left->getShape();
183              }
184              throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
185            }
186            return left->getShape();
187  }  }
188    
189  size_t  // return the shape for "op left"
190  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
191    DataTypes::ShapeType
192    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
193  {  {
194     switch (getOpgroup(op))          switch(op)
195     {          {
196     case G_BINARY: return left->getLength();          case TRANS:
197     case G_UNARY: return left->getLength();             {                    // for the scoping of variables
198     default:                  const DataTypes::ShapeType& s=left->getShape();
199      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");                  DataTypes::ShapeType sh;
200     }                  int rank=left->getRank();
201                    if (axis_offset<0 || axis_offset>rank)
202                    {
203                stringstream e;
204                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
205                throw DataException(e.str());
206            }
207            for (int i=0; i<rank; i++)
208                    {
209                       int index = (axis_offset+i)%rank;
210               sh.push_back(s[index]); // Append to new shape
211            }
212                    return sh;
213               }
214            break;
215            case TRACE:
216               {
217                    int rank=left->getRank();
218                    if (rank<2)
219                    {
220                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
221                    }
222                    if ((axis_offset>rank-2) || (axis_offset<0))
223                    {
224                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
225                    }
226                    if (rank==2)
227                    {
228                       return DataTypes::scalarShape;
229                    }
230                    else if (rank==3)
231                    {
232                       DataTypes::ShapeType sh;
233                       if (axis_offset==0)
234                       {
235                            sh.push_back(left->getShape()[2]);
236                       }
237                       else         // offset==1
238                       {
239                            sh.push_back(left->getShape()[0]);
240                       }
241                       return sh;
242                    }
243                    else if (rank==4)
244                    {
245                       DataTypes::ShapeType sh;
246                       const DataTypes::ShapeType& s=left->getShape();
247                       if (axis_offset==0)
248                       {
249                            sh.push_back(s[2]);
250                            sh.push_back(s[3]);
251                       }
252                       else if (axis_offset==1)
253                       {
254                            sh.push_back(s[0]);
255                            sh.push_back(s[3]);
256                       }
257                       else         // offset==2
258                       {
259                            sh.push_back(s[0]);
260                            sh.push_back(s[1]);
261                       }
262                       return sh;
263                    }
264                    else            // unknown rank
265                    {
266                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
267                    }
268               }
269            break;
270            default:
271            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
272            }
273  }  }
274    
275  int  DataTypes::ShapeType
276  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
277  {  {
278     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
279     {       // Some of the checks are probably redundant here
280     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
281     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
282     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
283     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
284      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
285     }       int rank=left->getRank();
286         if (rank<2) {
287            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
288         }
289         if (axis0<0 || axis0>rank-1) {
290            stringstream e;
291            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
292            throw DataException(e.str());
293         }
294         if (axis1<0 || axis1>rank-1) {
295            stringstream e;
296            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
297            throw DataException(e.str());
298         }
299         if (axis0 == axis1) {
300             throw DataException("Error - Data::swapaxes: axis indices must be different.");
301         }
302         if (axis0 > axis1) {
303             axis0_tmp=axis1;
304             axis1_tmp=axis0;
305         } else {
306             axis0_tmp=axis0;
307             axis1_tmp=axis1;
308         }
309         for (int i=0; i<rank; i++) {
310           if (i == axis0_tmp) {
311              out_shape.push_back(s[axis1_tmp]);
312           } else if (i == axis1_tmp) {
313              out_shape.push_back(s[axis0_tmp]);
314           } else {
315              out_shape.push_back(s[i]);
316           }
317         }
318        return out_shape;
319  }  }
320    
321    
322    // determine the output shape for the general tensor product operation
323    // the additional parameters return information required later for the product
324    // the majority of this code is copy pasted from C_General_Tensor_Product
325    DataTypes::ShapeType
326    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
327    {
328            
329      // Get rank and shape of inputs
330      int rank0 = left->getRank();
331      int rank1 = right->getRank();
332      const DataTypes::ShapeType& shape0 = left->getShape();
333      const DataTypes::ShapeType& shape1 = right->getShape();
334    
335      // Prepare for the loops of the product and verify compatibility of shapes
336      int start0=0, start1=0;
337      if (transpose == 0)           {}
338      else if (transpose == 1)      { start0 = axis_offset; }
339      else if (transpose == 2)      { start1 = rank1-axis_offset; }
340      else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
341    
342      if (rank0<axis_offset)
343      {
344            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
345      }
346    
347  }   // end anonymous namespace    // Adjust the shapes for transpose
348      DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
349      DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
350      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
351      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
352    
353      // Prepare for the loops of the product
354      SL=1, SM=1, SR=1;
355      for (int i=0; i<rank0-axis_offset; i++)       {
356        SL *= tmpShape0[i];
357      }
358      for (int i=rank0-axis_offset; i<rank0; i++)   {
359        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
360          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
361        }
362        SM *= tmpShape0[i];
363      }
364      for (int i=axis_offset; i<rank1; i++)         {
365        SR *= tmpShape1[i];
366      }
367    
368      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
369      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
370      {                     // block to limit the scope of out_index
371         int out_index=0;
372         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
373         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
374      }
375    
376  const std::string&    if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
 opToString(ES_optype op)  
 {  
   if (op<0 || op>=ES_opcount)  
377    {    {
378      op=UNKNOWNOP;       ostringstream os;
379         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
380         throw DataException(os.str());
381    }    }
382    return ES_opstrings[op];  
383      return shape2;
384  }  }
385    
386    }       // end anonymous namespace
387    
388    void DataLazy::LazyNodeSetup()
389    {
390    #ifdef _OPENMP
391        int numthreads=omp_get_max_threads();
392        m_samples_r.resize(numthreads*m_samplesize);
393        m_sampleids=new int[numthreads];
394        for (int i=0;i<numthreads;++i)
395        {
396            m_sampleids[i]=-1;  
397        }
398    #else
399        if (m_iscompl)
400        {
401            m_samples_c.resize(m_samplesize);
402        }
403        else
404        {
405            m_samples_r.resize(m_samplesize);
406        }
407        m_sampleids=new int[1];
408        m_sampleids[0]=-1;
409    #endif  // _OPENMP
410    }
411    
412    
413    // Creates an identity node
414  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
415      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
416      m_op(IDENTITY)          ,m_sampleids(0),
417            m_samples_r(1)
418  {  {
419     if (p->isLazy())     if (p->isLazy())
420     {     {
421      // TODO: fix this.   We could make the new node a copy of p?          // I don't want identity of Lazy.
422      // I don't want identity of Lazy.          // Question: Why would that be so bad?
423      // Question: Why would that be so bad?          // Answer: We assume that the child of ID is something we can call getVector on
424      // Answer: We assume that the child of ID is something we can call getVector on          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
     throw DataException("Programmer error - attempt to create identity from a DataLazy.");  
425     }     }
426     else     else
427     {     {
428      m_id=dynamic_pointer_cast<DataReady>(p);          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
429            makeIdentity(dr);
430    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
431     }     }
432     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
433  }  }
434    
435  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
436      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
437      m_op(op)          m_op(op),
438            m_axis_offset(0),
439            m_transpose(0),
440            m_SL(0), m_SM(0), m_SR(0)
441  {  {
442     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
443     {     {
444      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
445     }     }
446    
447     DataLazy_ptr lleft;     DataLazy_ptr lleft;
448     if (!left->isLazy())     if (!left->isLazy())
449     {     {
450      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
451     }     }
452     else     else
453     {     {
454      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
455     }     }
456     m_length=left->getLength();     m_readytype=lleft->m_readytype;
457     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
458     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
459       m_children=m_left->m_children+1;
460       m_height=m_left->m_height+1;
461       LazyNodeSetup();
462       SIZELIMIT
463  }  }
464    
465    
466  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
467      : parent(resultFS(left,right,op), resultShape(left,right,op)),  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
468      m_left(left),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
469      m_right(right),          m_op(op),
470      m_op(op)          m_SL(0), m_SM(0), m_SR(0)
471  {  {
472     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
473       if ((getOpgroup(op)!=G_BINARY))
474       {
475            throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
476       }
477    
478       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
479       {
480            FunctionSpace fs=getFunctionSpace();
481            Data ltemp(left);
482            Data tmp(ltemp,fs);
483            left=tmp.borrowDataPtr();
484       }
485       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
486       {
487            Data tmp(Data(right),getFunctionSpace());
488            right=tmp.borrowDataPtr();
489    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
490       }
491       left->operandCheck(*right);
492    
493       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
494       {
495            m_left=dynamic_pointer_cast<DataLazy>(left);
496    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
497       }
498       else
499       {
500            m_left=DataLazy_ptr(new DataLazy(left));
501    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
502       }
503       if (right->isLazy())
504     {     {
505      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          m_right=dynamic_pointer_cast<DataLazy>(right);
506    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
507       }
508       else
509       {
510            m_right=DataLazy_ptr(new DataLazy(right));
511    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
512       }
513       char lt=m_left->m_readytype;
514       char rt=m_right->m_readytype;
515       if (lt=='E' || rt=='E')
516       {
517            m_readytype='E';
518       }
519       else if (lt=='T' || rt=='T')
520       {
521            m_readytype='T';
522       }
523       else
524       {
525            m_readytype='C';
526     }     }
    m_length=resultLength(m_left,m_right,m_op);  
527     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
528     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+m_right->m_children+2;
529  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
530       LazyNodeSetup();
531       SIZELIMIT
532    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
533  }  }
534    
535  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
536      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
537      m_op(op)          m_op(op),
538            m_axis_offset(axis_offset),
539            m_transpose(transpose)
540  {  {
541     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
542       {
543            throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
544       }
545       if ((transpose>2) || (transpose<0))
546       {
547            throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
548       }
549       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
550     {     {
551      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");          FunctionSpace fs=getFunctionSpace();
552            Data ltemp(left);
553            Data tmp(ltemp,fs);
554            left=tmp.borrowDataPtr();
555     }     }
556     if (left->isLazy())     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
557     {     {
558      m_left=dynamic_pointer_cast<DataLazy>(left);          Data tmp(Data(right),getFunctionSpace());
559            right=tmp.borrowDataPtr();
560       }
561    //    left->operandCheck(*right);
562    
563       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
564       {
565            m_left=dynamic_pointer_cast<DataLazy>(left);
566     }     }
567     else     else
568     {     {
569      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
570     }     }
571     if (right->isLazy())     if (right->isLazy())
572     {     {
573      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
574       }
575       else
576       {
577            m_right=DataLazy_ptr(new DataLazy(right));
578       }
579       char lt=m_left->m_readytype;
580       char rt=m_right->m_readytype;
581       if (lt=='E' || rt=='E')
582       {
583            m_readytype='E';
584       }
585       else if (lt=='T' || rt=='T')
586       {
587            m_readytype='T';
588     }     }
589     else     else
590     {     {
591      m_right=DataLazy_ptr(new DataLazy(right));          m_readytype='C';
592     }     }
593       m_samplesize=getNumDPPSample()*getNoValues();
594       m_children=m_left->m_children+m_right->m_children+2;
595       m_height=max(m_left->m_height,m_right->m_height)+1;
596       LazyNodeSetup();
597       SIZELIMIT
598    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
599    }
600    
601    
602     m_length=resultLength(m_left,m_right,m_op);  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
603            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
604            m_op(op),
605            m_axis_offset(axis_offset),
606            m_transpose(0),
607            m_tol(0)
608    {
609       if ((getOpgroup(op)!=G_NP1OUT_P))
610       {
611            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
612       }
613       DataLazy_ptr lleft;
614       if (!left->isLazy())
615       {
616            lleft=DataLazy_ptr(new DataLazy(left));
617       }
618       else
619       {
620            lleft=dynamic_pointer_cast<DataLazy>(left);
621       }
622       m_readytype=lleft->m_readytype;
623       m_left=lleft;
624     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
625     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+1;
626  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
627       LazyNodeSetup();
628       SIZELIMIT
629    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
630  }  }
631    
632    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
633            : parent(left->getFunctionSpace(), left->getShape()),
634            m_op(op),
635            m_axis_offset(0),
636            m_transpose(0),
637            m_tol(tol)
638    {
639       if ((getOpgroup(op)!=G_UNARY_P))
640       {
641            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
642       }
643       DataLazy_ptr lleft;
644       if (!left->isLazy())
645       {
646            lleft=DataLazy_ptr(new DataLazy(left));
647       }
648       else
649       {
650            lleft=dynamic_pointer_cast<DataLazy>(left);
651       }
652       m_readytype=lleft->m_readytype;
653       m_left=lleft;
654       m_samplesize=getNumDPPSample()*getNoValues();
655       m_children=m_left->m_children+1;
656       m_height=m_left->m_height+1;
657       LazyNodeSetup();
658       SIZELIMIT
659    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
660    }
661    
662  DataLazy::~DataLazy()  
663    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
664            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
665            m_op(op),
666            m_axis_offset(axis0),
667            m_transpose(axis1),
668            m_tol(0)
669  {  {
670       if ((getOpgroup(op)!=G_NP1OUT_2P))
671       {
672            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
673       }
674       DataLazy_ptr lleft;
675       if (!left->isLazy())
676       {
677            lleft=DataLazy_ptr(new DataLazy(left));
678       }
679       else
680       {
681            lleft=dynamic_pointer_cast<DataLazy>(left);
682       }
683       m_readytype=lleft->m_readytype;
684       m_left=lleft;
685       m_samplesize=getNumDPPSample()*getNoValues();
686       m_children=m_left->m_children+1;
687       m_height=m_left->m_height+1;
688       LazyNodeSetup();
689       SIZELIMIT
690    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
691  }  }
692    
693    
694  int  namespace
 DataLazy::getBuffsRequired() const  
695  {  {
696      return m_buffsRequired;  
697        inline int max3(int a, int b, int c)
698        {
699            int t=(a>b?a:b);
700            return (t>c?t:c);
701    
702        }
703  }  }
704    
705    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
706            : parent(left->getFunctionSpace(), left->getShape()),
707            m_op(CONDEVAL),
708            m_axis_offset(0),
709            m_transpose(0),
710            m_tol(0)
711    {
712    
713  // the vector and the offset are a place where the method could write its data if it wishes     DataLazy_ptr lmask;
714  // it is not obligated to do so. For example, if it has its own storage already, it can use that.     DataLazy_ptr lleft;
715  // Hence the return value to indicate where the data is actually stored.     DataLazy_ptr lright;
716  // Regardless, the storage should be assumed to be used, even if it isn't.     if (!mask->isLazy())
717  const double*     {
718  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const          lmask=DataLazy_ptr(new DataLazy(mask));
719       }
720       else
721       {
722            lmask=dynamic_pointer_cast<DataLazy>(mask);
723       }
724       if (!left->isLazy())
725       {
726            lleft=DataLazy_ptr(new DataLazy(left));
727       }
728       else
729       {
730            lleft=dynamic_pointer_cast<DataLazy>(left);
731       }
732       if (!right->isLazy())
733       {
734            lright=DataLazy_ptr(new DataLazy(right));
735       }
736       else
737       {
738            lright=dynamic_pointer_cast<DataLazy>(right);
739       }
740       m_readytype=lmask->m_readytype;
741       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
742       {
743            throw DataException("Programmer Error - condEval arguments must have the same readytype");
744       }
745       m_left=lleft;
746       m_right=lright;
747       m_mask=lmask;
748       m_samplesize=getNumDPPSample()*getNoValues();
749       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
750       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
751       LazyNodeSetup();
752       SIZELIMIT
753    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
754    }
755    
756    
757    
758    DataLazy::~DataLazy()
759    {
760       delete[] m_sampleids;
761    }
762    
763    
764    /*
765      \brief Evaluates the expression using methods on Data.
766      This does the work for the collapse method.
767      For reasons of efficiency do not call this method on DataExpanded nodes.
768    */
769    DataReady_ptr
770    DataLazy::collapseToReady() const
771  {  {
772    if (m_op==IDENTITY)      if (m_readytype=='E')
773      {     // this is more an efficiency concern than anything else
774        throw DataException("Programmer Error - do not use collapse on Expanded data.");
775      }
776      if (m_op==IDENTITY)
777    {    {
778      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
779    }    }
780    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
781    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
782    const double* right=0;    Data right;
783    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
784    {    {
785      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
786    }    }
787    double* result=&(v[offset]);    Data result;
788      switch(m_op)
789    {    {
790      switch(m_op)      case ADD:
791      {          result=left+right;
792      case ADD:       // since these are pointwise ops, pretend each sample is one point          break;
793      tensor_binary_operation(m_samplesize, left, right, result, plus<double>());      case SUB:          
794      break;          result=left-right;
795      case SUB:                break;
796      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      case MUL:          
797      break;          result=left*right;
798      case MUL:                break;
799      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      case DIV:          
800      break;          result=left/right;
801      case DIV:                break;
     tensor_binary_operation(m_samplesize, left, right, result, divides<double>());  
     break;  
 // unary ops  
802      case SIN:      case SIN:
803      tensor_unary_operation(m_samplesize, left, result, ::sin);          result=left.sin();      
804      break;          break;
805      case COS:      case COS:
806      tensor_unary_operation(m_samplesize, left, result, ::cos);          result=left.cos();
807      break;          break;
808      case TAN:      case TAN:
809      tensor_unary_operation(m_samplesize, left, result, ::tan);          result=left.tan();
810      break;          break;
811      case ASIN:      case ASIN:
812      tensor_unary_operation(m_samplesize, left, result, ::asin);          result=left.asin();
813      break;          break;
814      case ACOS:      case ACOS:
815      tensor_unary_operation(m_samplesize, left, result, ::acos);          result=left.acos();
816      break;          break;
817      case ATAN:      case ATAN:
818      tensor_unary_operation(m_samplesize, left, result, ::atan);          result=left.atan();
819      break;          break;
820      case SINH:      case SINH:
821      tensor_unary_operation(m_samplesize, left, result, ::sinh);          result=left.sinh();
822      break;          break;
823      case COSH:      case COSH:
824      tensor_unary_operation(m_samplesize, left, result, ::cosh);          result=left.cosh();
825      break;          break;
826      case TANH:      case TANH:
827      tensor_unary_operation(m_samplesize, left, result, ::tanh);          result=left.tanh();
828      break;          break;
829      case ERF:      case ERF:
830  #ifdef _WIN32          result=left.erf();
831      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");          break;
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
832     case ASINH:     case ASINH:
833  #ifdef _WIN32          result=left.asinh();
834      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);          break;
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
835     case ACOSH:     case ACOSH:
836  #ifdef _WIN32          result=left.acosh();
837      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);          break;
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
838     case ATANH:     case ATANH:
839  #ifdef _WIN32          result=left.atanh();
840      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);          break;
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
841      case LOG10:      case LOG10:
842      tensor_unary_operation(m_samplesize, left, result, ::log10);          result=left.log10();
843      break;          break;
844      case LOG:      case LOG:
845      tensor_unary_operation(m_samplesize, left, result, ::log);          result=left.log();
846      break;          break;
847      case SIGN:      case SIGN:
848      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          result=left.sign();
849      break;          break;
850      case ABS:      case ABS:
851      tensor_unary_operation(m_samplesize, left, result, ::fabs);          result=left.abs();
852      break;          break;
853      case NEG:      case NEG:
854      tensor_unary_operation(m_samplesize, left, result, negate<double>());          result=left.neg();
855      break;          break;
856      case POS:      case POS:
857      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
858      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
859      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
860      break;          break;
861      case EXP:      case EXP:
862      tensor_unary_operation(m_samplesize, left, result, ::exp);          result=left.exp();
863      break;          break;
864      case SQRT:      case SQRT:
865      tensor_unary_operation(m_samplesize, left, result, ::sqrt);          result=left.sqrt();
866      break;          break;
867      case RECIP:      case RECIP:
868      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          result=left.oneOver();
869      break;          break;
870      case GZ:      case GZ:
871      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          result=left.wherePositive();
872      break;          break;
873      case LZ:      case LZ:
874      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          result=left.whereNegative();
875      break;          break;
876      case GEZ:      case GEZ:
877      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          result=left.whereNonNegative();
878      break;          break;
879      case LEZ:      case LEZ:
880      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          result=left.whereNonPositive();
881            break;
882        case NEZ:
883            result=left.whereNonZero(m_tol);
884            break;
885        case EZ:
886            result=left.whereZero(m_tol);
887            break;
888        case SYM:
889            result=left.symmetric();
890            break;
891        case NSYM:
892            result=left.antisymmetric();
893            break;
894        case PROD:
895            result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
896            break;
897        case TRANS:
898            result=left.transpose(m_axis_offset);
899            break;
900        case TRACE:
901            result=left.trace(m_axis_offset);
902            break;
903        case SWAP:
904            result=left.swapaxes(m_axis_offset, m_transpose);
905            break;
906        case MINVAL:
907            result=left.minval();
908            break;
909        case MAXVAL:
910            result=left.minval();
911            break;
912        case HER:
913        result=left.hermitian();
914      break;      break;
915        default:
916            throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
917      }
918      return result.borrowReadyPtr();
919    }
920    
921    /*
922       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
923       This method uses the original methods on the Data class to evaluate the expressions.
924       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
925       the purpose of using DataLazy in the first place).
926    */
927    void
928    DataLazy::collapse() const
929    {
930      if (m_op==IDENTITY)
931      {
932            return;
933      }
934      if (m_readytype=='E')
935      {     // this is more an efficiency concern than anything else
936        throw DataException("Programmer Error - do not use collapse on Expanded data.");
937      }
938      m_id=collapseToReady();
939      m_op=IDENTITY;
940    }
941    
942    // The result will be stored in m_samples
943    // The return value is a pointer to the DataVector, offset is the offset within the return value
944    const DataTypes::RealVectorType*
945    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
946    {
947    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
948            // collapse so we have a 'E' node or an IDENTITY for some other type
949      if (m_readytype!='E' && m_op!=IDENTITY)
950      {
951            collapse();
952      }
953      if (m_op==IDENTITY)  
954      {
955        const RealVectorType& vec=m_id->getVectorRO();
956        roffset=m_id->getPointOffset(sampleNo, 0);
957    #ifdef LAZY_STACK_PROF
958    int x;
959    if (&x<stackend[omp_get_thread_num()])
960    {
961           stackend[omp_get_thread_num()]=&x;
962    }
963    #endif
964        return &(vec);
965      }
966      if (m_readytype!='E')
967      {
968        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
969      }
970      if (m_sampleids[tid]==sampleNo)
971      {
972            roffset=tid*m_samplesize;
973            return &(m_samples_r);            // sample is already resolved
974      }
975      m_sampleids[tid]=sampleNo;
976    
977      switch (getOpgroup(m_op))
978      {
979      case G_UNARY:
980      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
981      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
982      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
983      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
984      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
985      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
986      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
987      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
988      default:
989        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
990      }
991    }
992    
993    const DataTypes::RealVectorType*
994    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
995    {
996            // we assume that any collapsing has been done before we get here
997            // since we only have one argument we don't need to think about only
998            // processing single points.
999            // we will also know we won't get identity nodes
1000      if (m_readytype!='E')
1001      {
1002        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1003      }
1004      if (m_op==IDENTITY)
1005      {
1006        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1007      }
1008      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1009      const double* left=&((*leftres)[roffset]);
1010      roffset=m_samplesize*tid;
1011      double* result=&(m_samples_r[roffset]);
1012      if (m_op==POS)
1013      {
1014        // this should be prevented earlier
1015        // operation is meaningless for lazy
1016            throw DataException("Programmer error - POS not supported for lazy data.");    
1017      }
1018      tensor_unary_array_operation(m_samplesize,
1019                                 left,
1020                                 result,
1021                                 m_op,
1022                                 m_tol);  
1023      return &(m_samples_r);
1024    }
1025    
1026    
1027    const DataTypes::RealVectorType*
1028    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1029    {
1030            // we assume that any collapsing has been done before we get here
1031            // since we only have one argument we don't need to think about only
1032            // processing single points.
1033            // we will also know we won't get identity nodes
1034      if (m_readytype!='E')
1035      {
1036        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1037      }
1038      if (m_op==IDENTITY)
1039      {
1040        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1041      }
1042      size_t loffset=0;
1043      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1044    
1045      roffset=m_samplesize*tid;
1046      unsigned int ndpps=getNumDPPSample();
1047      unsigned int psize=DataTypes::noValues(m_left->getShape());
1048      double* result=&(m_samples_r[roffset]);
1049      switch (m_op)
1050      {
1051        case MINVAL:
1052            {
1053              for (unsigned int z=0;z<ndpps;++z)
1054              {
1055                FMin op;
1056                *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1057                loffset+=psize;
1058                result++;
1059              }
1060            }
1061            break;
1062        case MAXVAL:
1063            {
1064              for (unsigned int z=0;z<ndpps;++z)
1065              {
1066              FMax op;
1067              *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1068              loffset+=psize;
1069              result++;
1070              }
1071            }
1072            break;
1073      default:      default:
1074      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1075      }    }
1076      return &(m_samples_r);
1077    }
1078    
1079    const DataTypes::RealVectorType*
1080    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1081    {
1082            // we assume that any collapsing has been done before we get here
1083            // since we only have one argument we don't need to think about only
1084            // processing single points.
1085      if (m_readytype!='E')
1086      {
1087        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1088      }
1089      if (m_op==IDENTITY)
1090      {
1091        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1092      }
1093      size_t subroffset;
1094      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1095      roffset=m_samplesize*tid;
1096      size_t loop=0;
1097      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1098      size_t step=getNoValues();
1099      size_t offset=roffset;
1100      switch (m_op)
1101      {
1102        case SYM:
1103            for (loop=0;loop<numsteps;++loop)
1104            {
1105                escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(), offset);
1106                subroffset+=step;
1107                offset+=step;
1108            }
1109            break;
1110        case NSYM:
1111            for (loop=0;loop<numsteps;++loop)
1112            {
1113                escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(), offset);
1114                subroffset+=step;
1115                offset+=step;
1116            }
1117            break;
1118        default:
1119            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1120      }
1121      return &m_samples_r;
1122    }
1123    
1124    const DataTypes::RealVectorType*
1125    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1126    {
1127            // we assume that any collapsing has been done before we get here
1128            // since we only have one argument we don't need to think about only
1129            // processing single points.
1130      if (m_readytype!='E')
1131      {
1132        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1133      }
1134      if (m_op==IDENTITY)
1135      {
1136        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1137      }
1138      size_t subroffset;
1139      size_t offset;
1140      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1141      roffset=m_samplesize*tid;
1142      offset=roffset;
1143      size_t loop=0;
1144      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1145      size_t outstep=getNoValues();
1146      size_t instep=m_left->getNoValues();
1147      switch (m_op)
1148      {
1149        case TRACE:
1150            for (loop=0;loop<numsteps;++loop)
1151            {
1152                escript::trace(*leftres,m_left->getShape(),subroffset, m_samples_r ,getShape(),offset,m_axis_offset);
1153                subroffset+=instep;
1154                offset+=outstep;
1155            }
1156            break;
1157        case TRANS:
1158            for (loop=0;loop<numsteps;++loop)
1159            {
1160                escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(),offset,m_axis_offset);
1161                subroffset+=instep;
1162                offset+=outstep;
1163            }
1164            break;
1165        default:
1166            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1167      }
1168      return &m_samples_r;
1169    }
1170    
1171    
1172    const DataTypes::RealVectorType*
1173    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1174    {
1175      if (m_readytype!='E')
1176      {
1177        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1178      }
1179      if (m_op==IDENTITY)
1180      {
1181        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1182      }
1183      size_t subroffset;
1184      size_t offset;
1185      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1186      roffset=m_samplesize*tid;
1187      offset=roffset;
1188      size_t loop=0;
1189      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1190      size_t outstep=getNoValues();
1191      size_t instep=m_left->getNoValues();
1192      switch (m_op)
1193      {
1194        case SWAP:
1195            for (loop=0;loop<numsteps;++loop)
1196            {
1197                escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples_r, getShape(),offset, m_axis_offset, m_transpose);
1198                subroffset+=instep;
1199                offset+=outstep;
1200            }
1201            break;
1202        default:
1203            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1204      }
1205      return &m_samples_r;
1206    }
1207    
1208    const DataTypes::RealVectorType*
1209    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1210    {
1211      if (m_readytype!='E')
1212      {
1213        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1214      }
1215      if (m_op!=CONDEVAL)
1216      {
1217        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1218      }
1219      size_t subroffset;
1220    
1221      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1222      const RealVectorType* srcres=0;
1223      if ((*maskres)[subroffset]>0)
1224      {
1225            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1226      }
1227      else
1228      {
1229            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1230      }
1231    
1232      // Now we need to copy the result
1233    
1234      roffset=m_samplesize*tid;
1235      for (int i=0;i<m_samplesize;++i)
1236      {
1237            m_samples_r[roffset+i]=(*srcres)[subroffset+i];  
1238    }    }
1239    return result;  
1240      return &m_samples_r;
1241  }  }
1242    
1243    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1244    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1245    // If both children are expanded, then we can process them in a single operation (we treat
1246    // the whole sample as one big datapoint.
1247    // If one of the children is not expanded, then we need to treat each point in the sample
1248    // individually.
1249    // There is an additional complication when scalar operations are considered.
1250    // For example, 2+Vector.
1251    // In this case each double within the point is treated individually
1252    const DataTypes::RealVectorType*
1253    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1254    {
1255    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1256    
1257      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1258            // first work out which of the children are expanded
1259      bool leftExp=(m_left->m_readytype=='E');
1260      bool rightExp=(m_right->m_readytype=='E');
1261      if (!leftExp && !rightExp)
1262      {
1263            throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1264      }
1265      bool leftScalar=(m_left->getRank()==0);
1266      bool rightScalar=(m_right->getRank()==0);
1267      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1268      {
1269            throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1270      }
1271      size_t leftsize=m_left->getNoValues();
1272      size_t rightsize=m_right->getNoValues();
1273      size_t chunksize=1;                   // how many doubles will be processed in one go
1274      int leftstep=0;               // how far should the left offset advance after each step
1275      int rightstep=0;
1276      int numsteps=0;               // total number of steps for the inner loop
1277      int oleftstep=0;      // the o variables refer to the outer loop
1278      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1279      int onumsteps=1;
1280      
1281      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1282      bool RES=(rightExp && rightScalar);
1283      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1284      bool RS=(!rightExp && rightScalar);
1285      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1286      bool RN=(!rightExp && !rightScalar);
1287      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1288      bool REN=(rightExp && !rightScalar);
1289    
1290      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1291      {
1292            chunksize=m_left->getNumDPPSample()*leftsize;
1293            leftstep=0;
1294            rightstep=0;
1295            numsteps=1;
1296      }
1297      else if (LES || RES)
1298      {
1299            chunksize=1;
1300            if (LES)                // left is an expanded scalar
1301            {
1302                    if (RS)
1303                    {
1304                       leftstep=1;
1305                       rightstep=0;
1306                       numsteps=m_left->getNumDPPSample();
1307                    }
1308                    else            // RN or REN
1309                    {
1310                       leftstep=0;
1311                       oleftstep=1;
1312                       rightstep=1;
1313                       orightstep=(RN ? -(int)rightsize : 0);
1314                       numsteps=rightsize;
1315                       onumsteps=m_left->getNumDPPSample();
1316                    }
1317            }
1318            else            // right is an expanded scalar
1319            {
1320                    if (LS)
1321                    {
1322                       rightstep=1;
1323                       leftstep=0;
1324                       numsteps=m_right->getNumDPPSample();
1325                    }
1326                    else
1327                    {
1328                       rightstep=0;
1329                       orightstep=1;
1330                       leftstep=1;
1331                       oleftstep=(LN ? -(int)leftsize : 0);
1332                       numsteps=leftsize;
1333                       onumsteps=m_right->getNumDPPSample();
1334                    }
1335            }
1336      }
1337      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1338      {
1339            if (LEN)        // and Right will be a single value
1340            {
1341                    chunksize=rightsize;
1342                    leftstep=rightsize;
1343                    rightstep=0;
1344                    numsteps=m_left->getNumDPPSample();
1345                    if (RS)
1346                    {
1347                       numsteps*=leftsize;
1348                    }
1349            }
1350            else    // REN
1351            {
1352                    chunksize=leftsize;
1353                    rightstep=leftsize;
1354                    leftstep=0;
1355                    numsteps=m_right->getNumDPPSample();
1356                    if (LS)
1357                    {
1358                       numsteps*=rightsize;
1359                    }
1360            }
1361      }
1362    
1363      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1364            // Get the values of sub-expressions
1365      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1366      const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1367    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1368    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1369    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1370    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1371    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1372    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1373    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1374    
1375    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1376    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1377    
1378    
1379      roffset=m_samplesize*tid;
1380      double* resultp=&(m_samples_r[roffset]);                // results are stored at the vector offset we received
1381      switch(m_op)
1382      {
1383        case ADD:
1384            //PROC_OP(NO_ARG,plus<double>());
1385          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1386                 &(*left)[0],
1387                 &(*right)[0],
1388                 chunksize,
1389                 onumsteps,
1390                 numsteps,
1391                 resultStep,
1392                 leftstep,
1393                 rightstep,
1394                 oleftstep,
1395                 orightstep,
1396                 lroffset,
1397                 rroffset,
1398                 escript::ES_optype::ADD);  
1399            break;
1400        case SUB:
1401          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1402                 &(*left)[0],
1403                 &(*right)[0],
1404                 chunksize,
1405                 onumsteps,
1406                 numsteps,
1407                 resultStep,
1408                 leftstep,
1409                 rightstep,
1410                 oleftstep,
1411                 orightstep,
1412                 lroffset,
1413                 rroffset,
1414                 escript::ES_optype::SUB);        
1415            //PROC_OP(NO_ARG,minus<double>());
1416            break;
1417        case MUL:
1418            //PROC_OP(NO_ARG,multiplies<double>());
1419          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1420                 &(*left)[0],
1421                 &(*right)[0],
1422                 chunksize,
1423                 onumsteps,
1424                 numsteps,
1425                 resultStep,
1426                 leftstep,
1427                 rightstep,
1428                 oleftstep,
1429                 orightstep,
1430                 lroffset,
1431                 rroffset,
1432                 escript::ES_optype::MUL);        
1433            break;
1434        case DIV:
1435            //PROC_OP(NO_ARG,divides<double>());
1436          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1437                 &(*left)[0],
1438                 &(*right)[0],
1439                 chunksize,
1440                 onumsteps,
1441                 numsteps,
1442                 resultStep,
1443                 leftstep,
1444                 rightstep,
1445                 oleftstep,
1446                 orightstep,
1447                 lroffset,
1448                 rroffset,
1449                 escript::ES_optype::DIV);        
1450            break;
1451        case POW:
1452           //PROC_OP(double (double,double),::pow);
1453          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1454                 &(*left)[0],
1455                 &(*right)[0],
1456                 chunksize,
1457                 onumsteps,
1458                 numsteps,
1459                 resultStep,
1460                 leftstep,
1461                 rightstep,
1462                 oleftstep,
1463                 orightstep,
1464                 lroffset,
1465                 rroffset,
1466                 escript::ES_optype::POW);        
1467            break;
1468        default:
1469            throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1470      }
1471    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples_r[roffset] << endl;)
1472      return &m_samples_r;
1473    }
1474    
1475    
1476    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1477    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1478    // unlike the other resolve helpers, we must treat these datapoints separately.
1479    const DataTypes::RealVectorType*
1480    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1481    {
1482    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1483    
1484      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1485            // first work out which of the children are expanded
1486      bool leftExp=(m_left->m_readytype=='E');
1487      bool rightExp=(m_right->m_readytype=='E');
1488      int steps=getNumDPPSample();
1489      int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1490      int rightStep=(rightExp?m_right->getNoValues() : 0);
1491    
1492      int resultStep=getNoValues();
1493      roffset=m_samplesize*tid;
1494      size_t offset=roffset;
1495    
1496      const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1497    
1498      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1499    
1500    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1501    cout << getNoValues() << endl;)
1502    
1503    
1504    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1505    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1506    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1507    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1508    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1509    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1510    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1511    
1512      double* resultp=&(m_samples_r[offset]);         // results are stored at the vector offset we received
1513      switch(m_op)
1514      {
1515        case PROD:
1516            for (int i=0;i<steps;++i,resultp+=resultStep)
1517            {
1518              const double *ptr_0 = &((*left)[lroffset]);
1519              const double *ptr_1 = &((*right)[rroffset]);
1520    
1521    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1522    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1523    
1524              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1525    
1526              lroffset+=leftStep;
1527              rroffset+=rightStep;
1528            }
1529            break;
1530        default:
1531            throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1532      }
1533      roffset=offset;
1534      return &m_samples_r;
1535    }
1536    
1537    
1538    const DataTypes::RealVectorType*
1539    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1540    {
1541    #ifdef _OPENMP
1542            int tid=omp_get_thread_num();
1543    #else
1544            int tid=0;
1545    #endif
1546    
1547    #ifdef LAZY_STACK_PROF
1548            stackstart[tid]=&tid;
1549            stackend[tid]=&tid;
1550            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1551            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1552            #pragma omp critical
1553            if (d>maxstackuse)
1554            {
1555    cout << "Max resolve Stack use " << d << endl;
1556                    maxstackuse=d;
1557            }
1558            return r;
1559    #else
1560            return resolveNodeSample(tid, sampleNo, roffset);
1561    #endif
1562    }
1563    
1564    
1565    // This needs to do the work of the identity constructor
1566    void
1567    DataLazy::resolveToIdentity()
1568    {
1569       if (m_op==IDENTITY)
1570            return;
1571       DataReady_ptr p=resolveNodeWorker();
1572       makeIdentity(p);
1573    }
1574    
1575    void DataLazy::makeIdentity(const DataReady_ptr& p)
1576    {
1577       m_op=IDENTITY;
1578       m_axis_offset=0;
1579       m_transpose=0;
1580       m_SL=m_SM=m_SR=0;
1581       m_children=m_height=0;
1582       m_id=p;
1583       if(p->isConstant()) {m_readytype='C';}
1584       else if(p->isExpanded()) {m_readytype='E';}
1585       else if (p->isTagged()) {m_readytype='T';}
1586       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1587       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1588       m_left.reset();
1589       m_right.reset();
1590       m_iscompl=p->isComplex();
1591    }
1592    
1593    
1594  DataReady_ptr  DataReady_ptr
1595  DataLazy::resolve()  DataLazy::resolve()
1596  {  {
1597    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1598    // so the allocation of v needs to move inside the loop somehow      return m_id;
1599    }
1600    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1601    
1602    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1603    int numthreads=1;  void
1604    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1605    {
1606      if (dats.empty())
1607      {
1608            return;
1609      }
1610      vector<DataLazy*> work;
1611      FunctionSpace fs=dats[0]->getFunctionSpace();
1612      bool match=true;
1613      for (int i=dats.size()-1;i>=0;--i)
1614      {
1615            if (dats[i]->m_readytype!='E')
1616            {
1617                    dats[i]->collapse();
1618            }
1619            if (dats[i]->m_op!=IDENTITY)
1620            {
1621                    work.push_back(dats[i]);
1622                    if (fs!=dats[i]->getFunctionSpace())
1623                    {
1624                            match=false;
1625                    }
1626            }
1627      }
1628      if (work.empty())
1629      {
1630            return;         // no work to do
1631      }
1632      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1633      {             // it is possible that dats[0] is one of the objects which we discarded and
1634                    // all the other functionspaces match.
1635            vector<DataExpanded*> dep;
1636            vector<RealVectorType*> vecs;
1637            for (int i=0;i<work.size();++i)
1638            {
1639                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1640                    vecs.push_back(&(dep[i]->getVectorRW()));
1641            }
1642            int totalsamples=work[0]->getNumSamples();
1643            const RealVectorType* res=0; // Storage for answer
1644            int sample;
1645            #pragma omp parallel private(sample, res)
1646            {
1647                size_t roffset=0;
1648                #pragma omp for schedule(static)
1649                for (sample=0;sample<totalsamples;++sample)
1650                {
1651                    roffset=0;
1652                    int j;
1653                    for (j=work.size()-1;j>=0;--j)
1654                    {
1655  #ifdef _OPENMP  #ifdef _OPENMP
1656    numthreads=omp_get_max_threads();                      res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1657    int threadnum=0;  #else
1658  #endif                      res=work[j]->resolveNodeSample(0,sample,roffset);
1659    ValueType v(numthreads*threadbuffersize);  #endif
1660  cout << "Buffer created with size=" << v.size() << endl;                      RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1661    ValueType dummy(getNoValues());                      memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1662    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);                  }
1663    ValueType& resvec=result->getVector();              }
1664            }
1665            // Now we need to load the new results as identity ops into the lazy nodes
1666            for (int i=work.size()-1;i>=0;--i)
1667            {
1668                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1669            }
1670      }
1671      else  // functionspaces do not match
1672      {
1673            for (int i=0;i<work.size();++i)
1674            {
1675                    work[i]->resolveToIdentity();
1676            }
1677      }
1678    }
1679    
1680    
1681    
1682    // This version of resolve uses storage in each node to hold results
1683    DataReady_ptr
1684    DataLazy::resolveNodeWorker()
1685    {
1686      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1687      {
1688        collapse();
1689      }
1690      if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1691      {
1692        return m_id;
1693      }
1694            // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1695      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1696      RealVectorType& resvec=result->getVectorRW();
1697    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1698    
1699    int sample;    int sample;
   int resoffset;  
1700    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1701    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const RealVectorType* res=0;       // Storage for answer
1702    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1703      #pragma omp parallel private(sample,res)
1704    {    {
1705            size_t roffset=0;
1706    #ifdef LAZY_STACK_PROF
1707            stackstart[omp_get_thread_num()]=&roffset;
1708            stackend[omp_get_thread_num()]=&roffset;
1709    #endif
1710            #pragma omp for schedule(static)
1711            for (sample=0;sample<totalsamples;++sample)
1712            {
1713                    roffset=0;
1714  #ifdef _OPENMP  #ifdef _OPENMP
1715      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1716  #else  #else
1717      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1718  #endif  #endif
1719      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1720      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1721      {                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1722      resvec[resoffset]=res[i];                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1723      }          }
1724      }
1725    #ifdef LAZY_STACK_PROF
1726      for (int i=0;i<getNumberOfThreads();++i)
1727      {
1728            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1729    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1730            if (r>maxstackuse)
1731            {
1732                    maxstackuse=r;
1733            }
1734    }    }
1735      cout << "Max resolve Stack use=" << maxstackuse << endl;
1736    #endif
1737    return resptr;    return resptr;
1738  }  }
1739    
# Line 430  std::string Line 1741  std::string
1741  DataLazy::toString() const  DataLazy::toString() const
1742  {  {
1743    ostringstream oss;    ostringstream oss;
1744    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1745    intoString(oss);    switch (escriptParams.getLazyStrFmt())
1746      {
1747      case 1:       // tree format
1748            oss << endl;
1749            intoTreeString(oss,"");
1750            break;
1751      case 2:       // just the depth
1752            break;
1753      default:
1754            intoString(oss);
1755            break;
1756      }
1757    return oss.str();    return oss.str();
1758  }  }
1759    
1760    
1761  void  void
1762  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1763  {  {
1764    //    oss << "[" << m_children <<";"<<m_height <<"]";
1765    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1766    {    {
1767    case G_IDENTITY:    case G_IDENTITY:
1768      oss << '@' << m_id.get();          if (m_id->isExpanded())
1769      break;          {
1770               oss << "E";
1771            }
1772            else if (m_id->isTagged())
1773            {
1774              oss << "T";
1775            }
1776            else if (m_id->isConstant())
1777            {
1778              oss << "C";
1779            }
1780            else
1781            {
1782              oss << "?";
1783            }
1784            oss << '@' << m_id.get();
1785            break;
1786    case G_BINARY:    case G_BINARY:
1787      oss << '(';          oss << '(';
1788      m_left->intoString(oss);          m_left->intoString(oss);
1789      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1790      m_right->intoString(oss);          m_right->intoString(oss);
1791      oss << ')';          oss << ')';
1792      break;          break;
1793    case G_UNARY:    case G_UNARY:
1794      oss << opToString(m_op) << '(';    case G_UNARY_P:
1795      m_left->intoString(oss);    case G_NP1OUT:
1796      oss << ')';    case G_NP1OUT_P:
1797      break;    case G_REDUCTION:
1798            oss << opToString(m_op) << '(';
1799            m_left->intoString(oss);
1800            oss << ')';
1801            break;
1802      case G_TENSORPROD:
1803            oss << opToString(m_op) << '(';
1804            m_left->intoString(oss);
1805            oss << ", ";
1806            m_right->intoString(oss);
1807            oss << ')';
1808            break;
1809      case G_NP1OUT_2P:
1810            oss << opToString(m_op) << '(';
1811            m_left->intoString(oss);
1812            oss << ", " << m_axis_offset << ", " << m_transpose;
1813            oss << ')';
1814            break;
1815      case G_CONDEVAL:
1816            oss << opToString(m_op)<< '(' ;
1817            m_mask->intoString(oss);
1818            oss << " ? ";
1819            m_left->intoString(oss);
1820            oss << " : ";
1821            m_right->intoString(oss);
1822            oss << ')';
1823            break;
1824      default:
1825            oss << "UNKNOWN";
1826      }
1827    }
1828    
1829    
1830    void
1831    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1832    {
1833      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1834      switch (getOpgroup(m_op))
1835      {
1836      case G_IDENTITY:
1837            if (m_id->isExpanded())
1838            {
1839               oss << "E";
1840            }
1841            else if (m_id->isTagged())
1842            {
1843              oss << "T";
1844            }
1845            else if (m_id->isConstant())
1846            {
1847              oss << "C";
1848            }
1849            else
1850            {
1851              oss << "?";
1852            }
1853            oss << '@' << m_id.get() << endl;
1854            break;
1855      case G_BINARY:
1856            oss << opToString(m_op) << endl;
1857            indent+='.';
1858            m_left->intoTreeString(oss, indent);
1859            m_right->intoTreeString(oss, indent);
1860            break;
1861      case G_UNARY:
1862      case G_UNARY_P:
1863      case G_NP1OUT:
1864      case G_NP1OUT_P:
1865      case G_REDUCTION:
1866            oss << opToString(m_op) << endl;
1867            indent+='.';
1868            m_left->intoTreeString(oss, indent);
1869            break;
1870      case G_TENSORPROD:
1871            oss << opToString(m_op) << endl;
1872            indent+='.';
1873            m_left->intoTreeString(oss, indent);
1874            m_right->intoTreeString(oss, indent);
1875            break;
1876      case G_NP1OUT_2P:
1877            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1878            indent+='.';
1879            m_left->intoTreeString(oss, indent);
1880            break;
1881    default:    default:
1882      oss << "UNKNOWN";          oss << "UNKNOWN";
1883    }    }
1884  }  }
1885    
1886  // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
1887  DataAbstract*  DataAbstract*
1888  DataLazy::deepCopy()  DataLazy::deepCopy() const
1889  {  {
1890    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1891    {    {
1892      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1893      case G_UNARY:
1894      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1895      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1896      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1897      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1898      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1899      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1900      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1901      default:
1902            throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1903    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1904  }  }
1905    
1906    // For this, we don't care what op you were doing because the answer is now zero
1907    DataAbstract*
1908    DataLazy::zeroedCopy() const
1909    {
1910      return new DataLazy(m_id->zeroedCopy()->getPtr());
1911    }
1912    
1913  DataTypes::ValueType::size_type  // There is no single, natural interpretation of getLength on DataLazy.
1914    // Instances of DataReady can look at the size of their vectors.
1915    // For lazy though, it could be the size the data would be if it were resolved;
1916    // or it could be some function of the lengths of the DataReady instances which
1917    // form part of the expression.
1918    // Rather than have people making assumptions, I have disabled the method.
1919    DataTypes::RealVectorType::size_type
1920  DataLazy::getLength() const  DataLazy::getLength() const
1921  {  {
1922    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1923  }  }
1924    
1925    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 1929  DataLazy::getSlice(const DataTypes::Regi
1929    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1930  }  }
1931    
1932  DataTypes::ValueType::size_type  
1933    // To do this we need to rely on our child nodes
1934    DataTypes::RealVectorType::size_type
1935    DataLazy::getPointOffset(int sampleNo,
1936                     int dataPointNo)
1937    {
1938      if (m_op==IDENTITY)
1939      {
1940            return m_id->getPointOffset(sampleNo,dataPointNo);
1941      }
1942      if (m_readytype!='E')
1943      {
1944            collapse();
1945            return m_id->getPointOffset(sampleNo,dataPointNo);
1946      }
1947      // at this point we do not have an identity node and the expression will be Expanded
1948      // so we only need to know which child to ask
1949      if (m_left->m_readytype=='E')
1950      {
1951            return m_left->getPointOffset(sampleNo,dataPointNo);
1952      }
1953      else
1954      {
1955            return m_right->getPointOffset(sampleNo,dataPointNo);
1956      }
1957    }
1958    
1959    // To do this we need to rely on our child nodes
1960    DataTypes::RealVectorType::size_type
1961  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1962                   int dataPointNo) const                   int dataPointNo) const
1963  {  {
1964    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1965      {
1966            return m_id->getPointOffset(sampleNo,dataPointNo);
1967      }
1968      if (m_readytype=='E')
1969      {
1970        // at this point we do not have an identity node and the expression will be Expanded
1971        // so we only need to know which child to ask
1972        if (m_left->m_readytype=='E')
1973        {
1974            return m_left->getPointOffset(sampleNo,dataPointNo);
1975        }
1976        else
1977        {
1978            return m_right->getPointOffset(sampleNo,dataPointNo);
1979        }
1980      }
1981      if (m_readytype=='C')
1982      {
1983            return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1984      }
1985      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1986    }
1987    
1988    
1989    // I have decided to let Data:: handle this issue.
1990    void
1991    DataLazy::setToZero()
1992    {
1993    //   DataTypes::RealVectorType v(getNoValues(),0);
1994    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1995    //   m_op=IDENTITY;
1996    //   m_right.reset();  
1997    //   m_left.reset();
1998    //   m_readytype='C';
1999    //   m_buffsRequired=1;
2000    
2001      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2002      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2003  }  }
2004    
2005  }   // end namespace  bool
2006    DataLazy::actsExpanded() const
2007    {
2008            return (m_readytype=='E');
2009    }
2010    
2011    } // end namespace
2012    

Legend:
Removed from v.1888  
changed lines
  Added in v.6512

  ViewVC Help
Powered by ViewVC 1.1.26