/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 6144 by caltinay, Wed Apr 6 05:25:13 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Apache License, version 2.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.apache.org/licenses/LICENSE-2.0
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
 #ifdef USE_NETCDF  
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
18  #include "Data.h"  #include "Data.h"
19  #include "UnaryFuncs.h"     // for escript::fsign  #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22    #include "Utils.h"
23    #include "DataVectorOps.h"
24    
25    #include <iomanip> // for some fancy formatting in debug
26    
27    using namespace escript::DataTypes;
28    
29    #define NO_ARG
30    
31    // #define LAZYDEBUG(X) if (privdebug){X;}
32    #define LAZYDEBUG(X)
33    namespace
34    {
35    bool privdebug=false;
36    
37    #define ENABLEDEBUG privdebug=true;
38    #define DISABLEDEBUG privdebug=false;
39    }
40    
41    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
42    
43    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
44    
45    
46    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
47    
48    /*
49    How does DataLazy work?
50    ~~~~~~~~~~~~~~~~~~~~~~~
51    
52    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
53    denoted left and right.
54    
55    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
56    This means that all "internal" nodes in the structure are instances of DataLazy.
57    
58    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
59    Note that IDENTITY is not considered a unary operation.
60    
61    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
62    It must however form a DAG (directed acyclic graph).
63    I will refer to individual DataLazy objects with the structure as nodes.
64    
65    Each node also stores:
66    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
67            evaluated.
68    - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
69            evaluate the expression.
70    - m_samplesize ~ the number of doubles stored in a sample.
71    
72    When a new node is created, the above values are computed based on the values in the child nodes.
73    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
74    
75    The resolve method, which produces a DataReady from a DataLazy, does the following:
76    1) Create a DataReady to hold the new result.
77    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
78    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
79    
80    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
81    
82    resolveSample returns a Vector* and an offset within that vector where the result is stored.
83    Normally, this would be v, but for identity nodes their internal vector is returned instead.
84    
85    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
86    
87    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
88    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
89    
90    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
91    1) Add to the ES_optype.
92    2) determine what opgroup your operation belongs to (X)
93    3) add a string for the op to the end of ES_opstrings
94    4) increase ES_opcount
95    5) add an entry (X) to opgroups
96    6) add an entry to the switch in collapseToReady
97    7) add an entry to resolveX
98    */
99    
100    
101  using namespace std;  using namespace std;
102  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 104  using namespace boost;
104  namespace escript  namespace escript
105  {  {
106    
 const std::string&  
 opToString(ES_optype op);  
   
107  namespace  namespace
108  {  {
109    
110    
111    // enabling this will print out when ever the maximum stacksize used by resolve increases
112    // it assumes _OPENMP is also in use
113    //#define LAZY_STACK_PROF
114    
115    
116    
117    #ifndef _OPENMP
118      #ifdef LAZY_STACK_PROF
119      #undef LAZY_STACK_PROF
120      #endif
121    #endif
122    
123    
124    #ifdef LAZY_STACK_PROF
125    std::vector<void*> stackstart(getNumberOfThreads());
126    std::vector<void*> stackend(getNumberOfThreads());
127    size_t maxstackuse=0;
128    #endif
129    
130  enum ES_opgroup  enum ES_opgroup
131  {  {
132     G_UNKNOWN,     G_UNKNOWN,
133     G_IDENTITY,     G_IDENTITY,
134     G_BINARY,     G_BINARY,            // pointwise operations with two arguments
135     G_UNARY     G_UNARY,             // pointwise operations with one argument
136       G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
137       G_NP1OUT,            // non-pointwise op with one output
138       G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
139       G_TENSORPROD,        // general tensor product
140       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
141       G_REDUCTION,         // non-pointwise unary op with a scalar output
142       G_CONDEVAL
143  };  };
144    
145    
146    
147    
148  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
149              "asin","acos","atan","sinh","cosh","tanh","erf",                          "sin","cos","tan",
150              "asinh","acosh","atanh",                          "asin","acos","atan","sinh","cosh","tanh","erf",
151              "log10","log","sign","abs","neg","pos","exp","sqrt",                          "asinh","acosh","atanh",
152              "1/","where>0","where<0","where>=0","where<=0"};                          "log10","log","sign","abs","neg","pos","exp","sqrt",
153  int ES_opcount=32;                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
154  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9                          "symmetric","antisymmetric",
155              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16                          "prod",
156              G_UNARY,G_UNARY,G_UNARY,                    // 19                          "transpose", "trace",
157              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27                          "swapaxes",
158              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};                          "minval", "maxval",
159                            "condEval",
160                            "hermitian","antihermitian"
161    };
162    int ES_opcount=46;
163    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
164                            G_UNARY,G_UNARY,G_UNARY, //10
165                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
166                            G_UNARY,G_UNARY,G_UNARY,                                        // 20
167                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
168                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
169                            G_NP1OUT,G_NP1OUT,
170                            G_TENSORPROD,
171                            G_NP1OUT_P, G_NP1OUT_P,
172                            G_NP1OUT_2P,
173                            G_REDUCTION, G_REDUCTION,
174                            G_CONDEVAL,
175                            G_UNARY,G_UNARY
176    };
177  inline  inline
178  ES_opgroup  ES_opgroup
179  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 74  getOpgroup(ES_optype op) Line 185  getOpgroup(ES_optype op)
185  FunctionSpace  FunctionSpace
186  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187  {  {
188      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
189      // maybe we need an interpolate node -          // maybe we need an interpolate node -
190      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
191      // programming error exception.          // programming error exception.
192    
193      FunctionSpace l=left->getFunctionSpace();
194      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
195      {    if (l!=r)
196          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
197      }      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
198      return left->getFunctionSpace();      if (res==1)
199        {
200            return l;
201        }
202        if (res==-1)
203        {
204            return r;
205        }
206        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
207      }
208      return l;
209  }  }
210    
211  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
212    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
213  DataTypes::ShapeType  DataTypes::ShapeType
214  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
215  {  {
216      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
217      {          {
218          throw DataException("Shapes not the same - shapes must match for lazy data.");            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
219      }            {
220      return left->getShape();                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221              }
222    
223              if (left->getRank()==0)       // we need to allow scalar * anything
224              {
225                    return right->getShape();
226              }
227              if (right->getRank()==0)
228              {
229                    return left->getShape();
230              }
231              throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
232            }
233            return left->getShape();
234  }  }
235    
236  size_t  // return the shape for "op left"
237  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
238    DataTypes::ShapeType
239    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240  {  {
241     switch (getOpgroup(op))          switch(op)
242     {          {
243     case G_BINARY: return left->getLength();          case TRANS:
244     case G_UNARY: return left->getLength();             {                    // for the scoping of variables
245     default:                  const DataTypes::ShapeType& s=left->getShape();
246      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");                  DataTypes::ShapeType sh;
247     }                  int rank=left->getRank();
248                    if (axis_offset<0 || axis_offset>rank)
249                    {
250                stringstream e;
251                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252                throw DataException(e.str());
253            }
254            for (int i=0; i<rank; i++)
255                    {
256                       int index = (axis_offset+i)%rank;
257               sh.push_back(s[index]); // Append to new shape
258            }
259                    return sh;
260               }
261            break;
262            case TRACE:
263               {
264                    int rank=left->getRank();
265                    if (rank<2)
266                    {
267                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268                    }
269                    if ((axis_offset>rank-2) || (axis_offset<0))
270                    {
271                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272                    }
273                    if (rank==2)
274                    {
275                       return DataTypes::scalarShape;
276                    }
277                    else if (rank==3)
278                    {
279                       DataTypes::ShapeType sh;
280                       if (axis_offset==0)
281                       {
282                            sh.push_back(left->getShape()[2]);
283                       }
284                       else         // offset==1
285                       {
286                            sh.push_back(left->getShape()[0]);
287                       }
288                       return sh;
289                    }
290                    else if (rank==4)
291                    {
292                       DataTypes::ShapeType sh;
293                       const DataTypes::ShapeType& s=left->getShape();
294                       if (axis_offset==0)
295                       {
296                            sh.push_back(s[2]);
297                            sh.push_back(s[3]);
298                       }
299                       else if (axis_offset==1)
300                       {
301                            sh.push_back(s[0]);
302                            sh.push_back(s[3]);
303                       }
304                       else         // offset==2
305                       {
306                            sh.push_back(s[0]);
307                            sh.push_back(s[1]);
308                       }
309                       return sh;
310                    }
311                    else            // unknown rank
312                    {
313                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314                    }
315               }
316            break;
317            default:
318            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319            }
320  }  }
321    
322  int  DataTypes::ShapeType
323  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
324  {  {
325     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
326     {       // Some of the checks are probably redundant here
327     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
328     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
329     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
330     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332     }       int rank=left->getRank();
333         if (rank<2) {
334            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335         }
336         if (axis0<0 || axis0>rank-1) {
337            stringstream e;
338            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339            throw DataException(e.str());
340         }
341         if (axis1<0 || axis1>rank-1) {
342            stringstream e;
343            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344            throw DataException(e.str());
345         }
346         if (axis0 == axis1) {
347             throw DataException("Error - Data::swapaxes: axis indices must be different.");
348         }
349         if (axis0 > axis1) {
350             axis0_tmp=axis1;
351             axis1_tmp=axis0;
352         } else {
353             axis0_tmp=axis0;
354             axis1_tmp=axis1;
355         }
356         for (int i=0; i<rank; i++) {
357           if (i == axis0_tmp) {
358              out_shape.push_back(s[axis1_tmp]);
359           } else if (i == axis1_tmp) {
360              out_shape.push_back(s[axis0_tmp]);
361           } else {
362              out_shape.push_back(s[i]);
363           }
364         }
365        return out_shape;
366  }  }
367    
368    
369    // determine the output shape for the general tensor product operation
370    // the additional parameters return information required later for the product
371    // the majority of this code is copy pasted from C_General_Tensor_Product
372    DataTypes::ShapeType
373    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
374    {
375            
376      // Get rank and shape of inputs
377      int rank0 = left->getRank();
378      int rank1 = right->getRank();
379      const DataTypes::ShapeType& shape0 = left->getShape();
380      const DataTypes::ShapeType& shape1 = right->getShape();
381    
382      // Prepare for the loops of the product and verify compatibility of shapes
383      int start0=0, start1=0;
384      if (transpose == 0)           {}
385      else if (transpose == 1)      { start0 = axis_offset; }
386      else if (transpose == 2)      { start1 = rank1-axis_offset; }
387      else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388    
389      if (rank0<axis_offset)
390      {
391            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392      }
393    
394      // Adjust the shapes for transpose
395      DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
396      DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
397      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
398      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
399    
400      // Prepare for the loops of the product
401      SL=1, SM=1, SR=1;
402      for (int i=0; i<rank0-axis_offset; i++)       {
403        SL *= tmpShape0[i];
404      }
405      for (int i=rank0-axis_offset; i<rank0; i++)   {
406        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
407          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
408        }
409        SM *= tmpShape0[i];
410      }
411      for (int i=axis_offset; i<rank1; i++)         {
412        SR *= tmpShape1[i];
413      }
414    
415      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
416      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
417      {                     // block to limit the scope of out_index
418         int out_index=0;
419         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
421      }
422    
423      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
424      {
425         ostringstream os;
426         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
427         throw DataException(os.str());
428      }
429    
430      return shape2;
431    }
432    
433    }       // end anonymous namespace
434    
 }   // end anonymous namespace  
435    
436    
437    // Return a string representing the operation
438  const std::string&  const std::string&
439  opToString(ES_optype op)  opToString(ES_optype op)
440  {  {
# Line 138  opToString(ES_optype op) Line 445  opToString(ES_optype op)
445    return ES_opstrings[op];    return ES_opstrings[op];
446  }  }
447    
448    void DataLazy::LazyNodeSetup()
449    {
450    #ifdef _OPENMP
451        int numthreads=omp_get_max_threads();
452        m_samples.resize(numthreads*m_samplesize);
453        m_sampleids=new int[numthreads];
454        for (int i=0;i<numthreads;++i)
455        {
456            m_sampleids[i]=-1;  
457        }
458    #else
459        m_samples.resize(m_samplesize);
460        m_sampleids=new int[1];
461        m_sampleids[0]=-1;
462    #endif  // _OPENMP
463    }
464    
465    
466    // Creates an identity node
467  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
468      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
469      m_op(IDENTITY)          ,m_sampleids(0),
470            m_samples(1)
471  {  {
472     if (p->isLazy())     if (p->isLazy())
473     {     {
474      // TODO: fix this.   We could make the new node a copy of p?          // I don't want identity of Lazy.
475      // I don't want identity of Lazy.          // Question: Why would that be so bad?
476      // Question: Why would that be so bad?          // Answer: We assume that the child of ID is something we can call getVector on
477      // Answer: We assume that the child of ID is something we can call getVector on          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
     throw DataException("Programmer error - attempt to create identity from a DataLazy.");  
478     }     }
479     else     else
480     {     {
481      m_id=dynamic_pointer_cast<DataReady>(p);          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
482            makeIdentity(dr);
483    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
484     }     }
485     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
486  }  }
487    
488  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
489      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
490      m_op(op)          m_op(op),
491            m_axis_offset(0),
492            m_transpose(0),
493            m_SL(0), m_SM(0), m_SR(0)
494  {  {
495     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
496     {     {
497      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
498     }     }
499    
500     DataLazy_ptr lleft;     DataLazy_ptr lleft;
501     if (!left->isLazy())     if (!left->isLazy())
502     {     {
503      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
504     }     }
505     else     else
506     {     {
507      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
508     }     }
509     m_length=left->getLength();     m_readytype=lleft->m_readytype;
510     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
511     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
512       m_children=m_left->m_children+1;
513       m_height=m_left->m_height+1;
514       LazyNodeSetup();
515       SIZELIMIT
516  }  }
517    
518    
519  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
520      : parent(resultFS(left,right,op), resultShape(left,right,op)),  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
521      m_left(left),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
522      m_right(right),          m_op(op),
523      m_op(op)          m_SL(0), m_SM(0), m_SR(0)
524  {  {
525     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
526       if ((getOpgroup(op)!=G_BINARY))
527       {
528            throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
529       }
530    
531       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
532       {
533            FunctionSpace fs=getFunctionSpace();
534            Data ltemp(left);
535            Data tmp(ltemp,fs);
536            left=tmp.borrowDataPtr();
537       }
538       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
539       {
540            Data tmp(Data(right),getFunctionSpace());
541            right=tmp.borrowDataPtr();
542    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
543       }
544       left->operandCheck(*right);
545    
546       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
547       {
548            m_left=dynamic_pointer_cast<DataLazy>(left);
549    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
550       }
551       else
552       {
553            m_left=DataLazy_ptr(new DataLazy(left));
554    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
555       }
556       if (right->isLazy())
557       {
558            m_right=dynamic_pointer_cast<DataLazy>(right);
559    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
560       }
561       else
562       {
563            m_right=DataLazy_ptr(new DataLazy(right));
564    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
565       }
566       char lt=m_left->m_readytype;
567       char rt=m_right->m_readytype;
568       if (lt=='E' || rt=='E')
569       {
570            m_readytype='E';
571       }
572       else if (lt=='T' || rt=='T')
573       {
574            m_readytype='T';
575       }
576       else
577     {     {
578      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          m_readytype='C';
579     }     }
    m_length=resultLength(m_left,m_right,m_op);  
580     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
581     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+m_right->m_children+2;
582  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
583       LazyNodeSetup();
584       SIZELIMIT
585    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
586  }  }
587    
588  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
589      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
590      m_op(op)          m_op(op),
591            m_axis_offset(axis_offset),
592            m_transpose(transpose)
593  {  {
594     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
595       {
596            throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
597       }
598       if ((transpose>2) || (transpose<0))
599       {
600            throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
601       }
602       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
603       {
604            FunctionSpace fs=getFunctionSpace();
605            Data ltemp(left);
606            Data tmp(ltemp,fs);
607            left=tmp.borrowDataPtr();
608       }
609       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
610     {     {
611      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");          Data tmp(Data(right),getFunctionSpace());
612            right=tmp.borrowDataPtr();
613     }     }
614     if (left->isLazy())  //    left->operandCheck(*right);
615    
616       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
617     {     {
618      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
619     }     }
620     else     else
621     {     {
622      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
623     }     }
624     if (right->isLazy())     if (right->isLazy())
625     {     {
626      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
627       }
628       else
629       {
630            m_right=DataLazy_ptr(new DataLazy(right));
631       }
632       char lt=m_left->m_readytype;
633       char rt=m_right->m_readytype;
634       if (lt=='E' || rt=='E')
635       {
636            m_readytype='E';
637       }
638       else if (lt=='T' || rt=='T')
639       {
640            m_readytype='T';
641       }
642       else
643       {
644            m_readytype='C';
645       }
646       m_samplesize=getNumDPPSample()*getNoValues();
647       m_children=m_left->m_children+m_right->m_children+2;
648       m_height=max(m_left->m_height,m_right->m_height)+1;
649       LazyNodeSetup();
650       SIZELIMIT
651    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
652    }
653    
654    
655    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
656            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
657            m_op(op),
658            m_axis_offset(axis_offset),
659            m_transpose(0),
660            m_tol(0)
661    {
662       if ((getOpgroup(op)!=G_NP1OUT_P))
663       {
664            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
665       }
666       DataLazy_ptr lleft;
667       if (!left->isLazy())
668       {
669            lleft=DataLazy_ptr(new DataLazy(left));
670       }
671       else
672       {
673            lleft=dynamic_pointer_cast<DataLazy>(left);
674       }
675       m_readytype=lleft->m_readytype;
676       m_left=lleft;
677       m_samplesize=getNumDPPSample()*getNoValues();
678       m_children=m_left->m_children+1;
679       m_height=m_left->m_height+1;
680       LazyNodeSetup();
681       SIZELIMIT
682    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
683    }
684    
685    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
686            : parent(left->getFunctionSpace(), left->getShape()),
687            m_op(op),
688            m_axis_offset(0),
689            m_transpose(0),
690            m_tol(tol)
691    {
692       if ((getOpgroup(op)!=G_UNARY_P))
693       {
694            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
695       }
696       DataLazy_ptr lleft;
697       if (!left->isLazy())
698       {
699            lleft=DataLazy_ptr(new DataLazy(left));
700     }     }
701     else     else
702     {     {
703      m_right=DataLazy_ptr(new DataLazy(right));          lleft=dynamic_pointer_cast<DataLazy>(left);
704     }     }
705       m_readytype=lleft->m_readytype;
706       m_left=lleft;
707       m_samplesize=getNumDPPSample()*getNoValues();
708       m_children=m_left->m_children+1;
709       m_height=m_left->m_height+1;
710       LazyNodeSetup();
711       SIZELIMIT
712    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
713    }
714    
715    
716     m_length=resultLength(m_left,m_right,m_op);  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
717            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
718            m_op(op),
719            m_axis_offset(axis0),
720            m_transpose(axis1),
721            m_tol(0)
722    {
723       if ((getOpgroup(op)!=G_NP1OUT_2P))
724       {
725            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
726       }
727       DataLazy_ptr lleft;
728       if (!left->isLazy())
729       {
730            lleft=DataLazy_ptr(new DataLazy(left));
731       }
732       else
733       {
734            lleft=dynamic_pointer_cast<DataLazy>(left);
735       }
736       m_readytype=lleft->m_readytype;
737       m_left=lleft;
738     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
739     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+1;
740  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
741       LazyNodeSetup();
742       SIZELIMIT
743    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
744  }  }
745    
746    
747  DataLazy::~DataLazy()  namespace
748    {
749    
750        inline int max3(int a, int b, int c)
751        {
752            int t=(a>b?a:b);
753            return (t>c?t:c);
754    
755        }
756    }
757    
758    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
759            : parent(left->getFunctionSpace(), left->getShape()),
760            m_op(CONDEVAL),
761            m_axis_offset(0),
762            m_transpose(0),
763            m_tol(0)
764  {  {
765    
766       DataLazy_ptr lmask;
767       DataLazy_ptr lleft;
768       DataLazy_ptr lright;
769       if (!mask->isLazy())
770       {
771            lmask=DataLazy_ptr(new DataLazy(mask));
772       }
773       else
774       {
775            lmask=dynamic_pointer_cast<DataLazy>(mask);
776       }
777       if (!left->isLazy())
778       {
779            lleft=DataLazy_ptr(new DataLazy(left));
780       }
781       else
782       {
783            lleft=dynamic_pointer_cast<DataLazy>(left);
784       }
785       if (!right->isLazy())
786       {
787            lright=DataLazy_ptr(new DataLazy(right));
788       }
789       else
790       {
791            lright=dynamic_pointer_cast<DataLazy>(right);
792       }
793       m_readytype=lmask->m_readytype;
794       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
795       {
796            throw DataException("Programmer Error - condEval arguments must have the same readytype");
797       }
798       m_left=lleft;
799       m_right=lright;
800       m_mask=lmask;
801       m_samplesize=getNumDPPSample()*getNoValues();
802       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
803       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
804       LazyNodeSetup();
805       SIZELIMIT
806    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
807  }  }
808    
809    
810  int  
811  DataLazy::getBuffsRequired() const  DataLazy::~DataLazy()
812  {  {
813      return m_buffsRequired;     delete[] m_sampleids;
814  }  }
815    
816    
817  // the vector and the offset are a place where the method could write its data if it wishes  /*
818  // it is not obligated to do so. For example, if it has its own storage already, it can use that.    \brief Evaluates the expression using methods on Data.
819  // Hence the return value to indicate where the data is actually stored.    This does the work for the collapse method.
820  // Regardless, the storage should be assumed to be used, even if it isn't.    For reasons of efficiency do not call this method on DataExpanded nodes.
821  const double*  */
822  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  DataReady_ptr
823    DataLazy::collapseToReady() const
824  {  {
825    if (m_op==IDENTITY)      if (m_readytype=='E')
826      {     // this is more an efficiency concern than anything else
827        throw DataException("Programmer Error - do not use collapse on Expanded data.");
828      }
829      if (m_op==IDENTITY)
830    {    {
831      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
832    }    }
833    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
834    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
835    const double* right=0;    Data right;
836    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
837    {    {
838      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
839    }    }
840    double* result=&(v[offset]);    Data result;
841      switch(m_op)
842    {    {
843      switch(m_op)      case ADD:
844      {          result=left+right;
845      case ADD:       // since these are pointwise ops, pretend each sample is one point          break;
846      tensor_binary_operation(m_samplesize, left, right, result, plus<double>());      case SUB:          
847      break;          result=left-right;
848      case SUB:                break;
849      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      case MUL:          
850      break;          result=left*right;
851      case MUL:                break;
852      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      case DIV:          
853      break;          result=left/right;
854      case DIV:                break;
855      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      case SIN:
856            result=left.sin();      
857            break;
858        case COS:
859            result=left.cos();
860            break;
861        case TAN:
862            result=left.tan();
863            break;
864        case ASIN:
865            result=left.asin();
866            break;
867        case ACOS:
868            result=left.acos();
869            break;
870        case ATAN:
871            result=left.atan();
872            break;
873        case SINH:
874            result=left.sinh();
875            break;
876        case COSH:
877            result=left.cosh();
878            break;
879        case TANH:
880            result=left.tanh();
881            break;
882        case ERF:
883            result=left.erf();
884            break;
885       case ASINH:
886            result=left.asinh();
887            break;
888       case ACOSH:
889            result=left.acosh();
890            break;
891       case ATANH:
892            result=left.atanh();
893            break;
894        case LOG10:
895            result=left.log10();
896            break;
897        case LOG:
898            result=left.log();
899            break;
900        case SIGN:
901            result=left.sign();
902            break;
903        case ABS:
904            result=left.abs();
905            break;
906        case NEG:
907            result=left.neg();
908            break;
909        case POS:
910            // it doesn't mean anything for delayed.
911            // it will just trigger a deep copy of the lazy object
912            throw DataException("Programmer error - POS not supported for lazy data.");
913            break;
914        case EXP:
915            result=left.exp();
916            break;
917        case SQRT:
918            result=left.sqrt();
919            break;
920        case RECIP:
921            result=left.oneOver();
922            break;
923        case GZ:
924            result=left.wherePositive();
925            break;
926        case LZ:
927            result=left.whereNegative();
928            break;
929        case GEZ:
930            result=left.whereNonNegative();
931            break;
932        case LEZ:
933            result=left.whereNonPositive();
934            break;
935        case NEZ:
936            result=left.whereNonZero(m_tol);
937            break;
938        case EZ:
939            result=left.whereZero(m_tol);
940            break;
941        case SYM:
942            result=left.symmetric();
943            break;
944        case NSYM:
945            result=left.antisymmetric();
946            break;
947        case PROD:
948            result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
949            break;
950        case TRANS:
951            result=left.transpose(m_axis_offset);
952            break;
953        case TRACE:
954            result=left.trace(m_axis_offset);
955            break;
956        case SWAP:
957            result=left.swapaxes(m_axis_offset, m_transpose);
958            break;
959        case MINVAL:
960            result=left.minval();
961            break;
962        case MAXVAL:
963            result=left.minval();
964            break;
965        case HER:
966        result=left.hermitian();
967      break;      break;
968  // unary ops      default:
969            throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
970      }
971      return result.borrowReadyPtr();
972    }
973    
974    /*
975       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
976       This method uses the original methods on the Data class to evaluate the expressions.
977       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
978       the purpose of using DataLazy in the first place).
979    */
980    void
981    DataLazy::collapse() const
982    {
983      if (m_op==IDENTITY)
984      {
985            return;
986      }
987      if (m_readytype=='E')
988      {     // this is more an efficiency concern than anything else
989        throw DataException("Programmer Error - do not use collapse on Expanded data.");
990      }
991      m_id=collapseToReady();
992      m_op=IDENTITY;
993    }
994    
995    // The result will be stored in m_samples
996    // The return value is a pointer to the DataVector, offset is the offset within the return value
997    const DataTypes::RealVectorType*
998    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
999    {
1000    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1001            // collapse so we have a 'E' node or an IDENTITY for some other type
1002      if (m_readytype!='E' && m_op!=IDENTITY)
1003      {
1004            collapse();
1005      }
1006      if (m_op==IDENTITY)  
1007      {
1008        const RealVectorType& vec=m_id->getVectorRO();
1009        roffset=m_id->getPointOffset(sampleNo, 0);
1010    #ifdef LAZY_STACK_PROF
1011    int x;
1012    if (&x<stackend[omp_get_thread_num()])
1013    {
1014           stackend[omp_get_thread_num()]=&x;
1015    }
1016    #endif
1017        return &(vec);
1018      }
1019      if (m_readytype!='E')
1020      {
1021        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1022      }
1023      if (m_sampleids[tid]==sampleNo)
1024      {
1025            roffset=tid*m_samplesize;
1026            return &(m_samples);            // sample is already resolved
1027      }
1028      m_sampleids[tid]=sampleNo;
1029    
1030      switch (getOpgroup(m_op))
1031      {
1032      case G_UNARY:
1033      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1034      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1035      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1036      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1037      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1038      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1039      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1040      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1041      default:
1042        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1043      }
1044    }
1045    
1046    const DataTypes::RealVectorType*
1047    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1048    {
1049            // we assume that any collapsing has been done before we get here
1050            // since we only have one argument we don't need to think about only
1051            // processing single points.
1052            // we will also know we won't get identity nodes
1053      if (m_readytype!='E')
1054      {
1055        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1056      }
1057      if (m_op==IDENTITY)
1058      {
1059        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1060      }
1061      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1062      const double* left=&((*leftres)[roffset]);
1063      roffset=m_samplesize*tid;
1064      double* result=&(m_samples[roffset]);
1065      escript::ESFunction operation=SINF;
1066      switch (m_op)
1067      {
1068      case SIN:      case SIN:
1069      tensor_unary_operation(m_samplesize, left, result, ::sin);      operation=SINF;
1070      break;      break;
1071      case COS:      case COS:
1072      tensor_unary_operation(m_samplesize, left, result, ::cos);          operation=COSF;
1073      break;      break;
1074      case TAN:      case TAN:
1075      tensor_unary_operation(m_samplesize, left, result, ::tan);          operation=TANF;
1076      break;      break;
1077      case ASIN:      case ASIN:
1078      tensor_unary_operation(m_samplesize, left, result, ::asin);          operation=ASINF;
1079      break;      break;
1080      case ACOS:      case ACOS:
1081      tensor_unary_operation(m_samplesize, left, result, ::acos);          operation=ACOSF;
1082      break;      break;
1083      case ATAN:      case ATAN:
1084      tensor_unary_operation(m_samplesize, left, result, ::atan);          operation=ATANF;
1085      break;      break;
1086      case SINH:      case SINH:
1087      tensor_unary_operation(m_samplesize, left, result, ::sinh);          operation=SINHF;
1088      break;      break;
1089      case COSH:      case COSH:
1090      tensor_unary_operation(m_samplesize, left, result, ::cosh);          operation=COSHF;
1091      break;      break;
1092      case TANH:      case TANH:
1093      tensor_unary_operation(m_samplesize, left, result, ::tanh);          operation=TANHF;
1094      break;      break;
1095      case ERF:      case ERF:
1096  #ifdef _WIN32          operation=ERFF;
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
1097      break;      break;
 #endif  
1098     case ASINH:     case ASINH:
1099  #ifdef _WIN32          operation=ASINHF;
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
1100      break;      break;
1101     case ACOSH:     case ACOSH:
1102  #ifdef _WIN32          operation=ACOSHF;
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
1103      break;      break;
1104     case ATANH:     case ATANH:
1105  #ifdef _WIN32          operation=ATANHF;
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
1106      break;      break;
1107      case LOG10:      case LOG10:
1108      tensor_unary_operation(m_samplesize, left, result, ::log10);          operation=LOG10F;
1109      break;      break;
1110      case LOG:      case LOG:
1111      tensor_unary_operation(m_samplesize, left, result, ::log);          operation=LOGF;
1112      break;      break;
1113      case SIGN:      case SIGN:
1114      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          operation=SIGNF;
1115      break;      break;
1116      case ABS:      case ABS:
1117      tensor_unary_operation(m_samplesize, left, result, ::fabs);          operation=ABSF;
1118      break;      break;
1119      case NEG:      case NEG:
1120      tensor_unary_operation(m_samplesize, left, result, negate<double>());          operation=NEGF;
1121      break;      break;
1122      case POS:      case POS:
1123      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1124      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1125      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1126      break;          break;
1127      case EXP:      case EXP:
1128      tensor_unary_operation(m_samplesize, left, result, ::exp);          operation=EXPF;
1129      break;      break;
1130      case SQRT:      case SQRT:
1131      tensor_unary_operation(m_samplesize, left, result, ::sqrt);          operation=SQRTF;
1132      break;      break;
1133      case RECIP:      case RECIP:
1134      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));          operation=INVF;
1135      break;      break;
1136      case GZ:      case GZ:
1137      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));          operation=GTZEROF;
1138      break;      break;
1139      case LZ:      case LZ:
1140      tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));          operation=LTZEROF;
1141      break;      break;
1142      case GEZ:      case GEZ:
1143      tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));          operation=GEZEROF;
1144      break;      break;
1145      case LEZ:      case LEZ:
1146      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));          operation=LEZEROF;
1147        break;
1148    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1149        case NEZ:
1150            operation=NEQZEROF;
1151      break;      break;
1152        case EZ:
1153            operation=EQZEROF;
1154        break;
1155        default:
1156            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1157      }
1158      tensor_unary_array_operation(m_samplesize,
1159                                 left,
1160                                 result,
1161                                 operation,
1162                                 m_tol);  
1163      return &(m_samples);
1164    }
1165    
1166    
1167    const DataTypes::RealVectorType*
1168    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1169    {
1170            // we assume that any collapsing has been done before we get here
1171            // since we only have one argument we don't need to think about only
1172            // processing single points.
1173            // we will also know we won't get identity nodes
1174      if (m_readytype!='E')
1175      {
1176        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1177      }
1178      if (m_op==IDENTITY)
1179      {
1180        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1181      }
1182      size_t loffset=0;
1183      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1184    
1185      roffset=m_samplesize*tid;
1186      unsigned int ndpps=getNumDPPSample();
1187      unsigned int psize=DataTypes::noValues(m_left->getShape());
1188      double* result=&(m_samples[roffset]);
1189      switch (m_op)
1190      {
1191        case MINVAL:
1192            {
1193              for (unsigned int z=0;z<ndpps;++z)
1194              {
1195                FMin op;
1196                *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1197                loffset+=psize;
1198                result++;
1199              }
1200            }
1201            break;
1202        case MAXVAL:
1203            {
1204              for (unsigned int z=0;z<ndpps;++z)
1205              {
1206              FMax op;
1207              *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1208              loffset+=psize;
1209              result++;
1210              }
1211            }
1212            break;
1213      default:      default:
1214      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1215      }    }
1216      return &(m_samples);
1217    }
1218    
1219    const DataTypes::RealVectorType*
1220    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1221    {
1222            // we assume that any collapsing has been done before we get here
1223            // since we only have one argument we don't need to think about only
1224            // processing single points.
1225      if (m_readytype!='E')
1226      {
1227        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1228      }
1229      if (m_op==IDENTITY)
1230      {
1231        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1232      }
1233      size_t subroffset;
1234      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1235      roffset=m_samplesize*tid;
1236      size_t loop=0;
1237      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1238      size_t step=getNoValues();
1239      size_t offset=roffset;
1240      switch (m_op)
1241      {
1242        case SYM:
1243            for (loop=0;loop<numsteps;++loop)
1244            {
1245                escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1246                subroffset+=step;
1247                offset+=step;
1248            }
1249            break;
1250        case NSYM:
1251            for (loop=0;loop<numsteps;++loop)
1252            {
1253                escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1254                subroffset+=step;
1255                offset+=step;
1256            }
1257            break;
1258        default:
1259            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1260      }
1261      return &m_samples;
1262    }
1263    
1264    const DataTypes::RealVectorType*
1265    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1266    {
1267            // we assume that any collapsing has been done before we get here
1268            // since we only have one argument we don't need to think about only
1269            // processing single points.
1270      if (m_readytype!='E')
1271      {
1272        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1273      }
1274      if (m_op==IDENTITY)
1275      {
1276        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1277      }
1278      size_t subroffset;
1279      size_t offset;
1280      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1281      roffset=m_samplesize*tid;
1282      offset=roffset;
1283      size_t loop=0;
1284      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1285      size_t outstep=getNoValues();
1286      size_t instep=m_left->getNoValues();
1287      switch (m_op)
1288      {
1289        case TRACE:
1290            for (loop=0;loop<numsteps;++loop)
1291            {
1292                escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1293                subroffset+=instep;
1294                offset+=outstep;
1295            }
1296            break;
1297        case TRANS:
1298            for (loop=0;loop<numsteps;++loop)
1299            {
1300                escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1301                subroffset+=instep;
1302                offset+=outstep;
1303            }
1304            break;
1305        default:
1306            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1307      }
1308      return &m_samples;
1309    }
1310    
1311    
1312    const DataTypes::RealVectorType*
1313    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1314    {
1315      if (m_readytype!='E')
1316      {
1317        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1318      }
1319      if (m_op==IDENTITY)
1320      {
1321        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1322      }
1323      size_t subroffset;
1324      size_t offset;
1325      const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1326      roffset=m_samplesize*tid;
1327      offset=roffset;
1328      size_t loop=0;
1329      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1330      size_t outstep=getNoValues();
1331      size_t instep=m_left->getNoValues();
1332      switch (m_op)
1333      {
1334        case SWAP:
1335            for (loop=0;loop<numsteps;++loop)
1336            {
1337                escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1338                subroffset+=instep;
1339                offset+=outstep;
1340            }
1341            break;
1342        default:
1343            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1344      }
1345      return &m_samples;
1346    }
1347    
1348    const DataTypes::RealVectorType*
1349    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1350    {
1351      if (m_readytype!='E')
1352      {
1353        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1354      }
1355      if (m_op!=CONDEVAL)
1356      {
1357        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1358      }
1359      size_t subroffset;
1360    
1361      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1362      const RealVectorType* srcres=0;
1363      if ((*maskres)[subroffset]>0)
1364      {
1365            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1366      }
1367      else
1368      {
1369            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1370      }
1371    
1372      // Now we need to copy the result
1373    
1374      roffset=m_samplesize*tid;
1375      for (int i=0;i<m_samplesize;++i)
1376      {
1377            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1378      }
1379    
1380      return &m_samples;
1381    }
1382    
1383    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1384    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1385    // If both children are expanded, then we can process them in a single operation (we treat
1386    // the whole sample as one big datapoint.
1387    // If one of the children is not expanded, then we need to treat each point in the sample
1388    // individually.
1389    // There is an additional complication when scalar operations are considered.
1390    // For example, 2+Vector.
1391    // In this case each double within the point is treated individually
1392    const DataTypes::RealVectorType*
1393    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1394    {
1395    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1396    
1397      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1398            // first work out which of the children are expanded
1399      bool leftExp=(m_left->m_readytype=='E');
1400      bool rightExp=(m_right->m_readytype=='E');
1401      if (!leftExp && !rightExp)
1402      {
1403            throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1404      }
1405      bool leftScalar=(m_left->getRank()==0);
1406      bool rightScalar=(m_right->getRank()==0);
1407      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1408      {
1409            throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1410      }
1411      size_t leftsize=m_left->getNoValues();
1412      size_t rightsize=m_right->getNoValues();
1413      size_t chunksize=1;                   // how many doubles will be processed in one go
1414      int leftstep=0;               // how far should the left offset advance after each step
1415      int rightstep=0;
1416      int numsteps=0;               // total number of steps for the inner loop
1417      int oleftstep=0;      // the o variables refer to the outer loop
1418      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1419      int onumsteps=1;
1420      
1421      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1422      bool RES=(rightExp && rightScalar);
1423      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1424      bool RS=(!rightExp && rightScalar);
1425      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1426      bool RN=(!rightExp && !rightScalar);
1427      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1428      bool REN=(rightExp && !rightScalar);
1429    
1430      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1431      {
1432            chunksize=m_left->getNumDPPSample()*leftsize;
1433            leftstep=0;
1434            rightstep=0;
1435            numsteps=1;
1436      }
1437      else if (LES || RES)
1438      {
1439            chunksize=1;
1440            if (LES)                // left is an expanded scalar
1441            {
1442                    if (RS)
1443                    {
1444                       leftstep=1;
1445                       rightstep=0;
1446                       numsteps=m_left->getNumDPPSample();
1447                    }
1448                    else            // RN or REN
1449                    {
1450                       leftstep=0;
1451                       oleftstep=1;
1452                       rightstep=1;
1453                       orightstep=(RN ? -(int)rightsize : 0);
1454                       numsteps=rightsize;
1455                       onumsteps=m_left->getNumDPPSample();
1456                    }
1457            }
1458            else            // right is an expanded scalar
1459            {
1460                    if (LS)
1461                    {
1462                       rightstep=1;
1463                       leftstep=0;
1464                       numsteps=m_right->getNumDPPSample();
1465                    }
1466                    else
1467                    {
1468                       rightstep=0;
1469                       orightstep=1;
1470                       leftstep=1;
1471                       oleftstep=(LN ? -(int)leftsize : 0);
1472                       numsteps=leftsize;
1473                       onumsteps=m_right->getNumDPPSample();
1474                    }
1475            }
1476      }
1477      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1478      {
1479            if (LEN)        // and Right will be a single value
1480            {
1481                    chunksize=rightsize;
1482                    leftstep=rightsize;
1483                    rightstep=0;
1484                    numsteps=m_left->getNumDPPSample();
1485                    if (RS)
1486                    {
1487                       numsteps*=leftsize;
1488                    }
1489            }
1490            else    // REN
1491            {
1492                    chunksize=leftsize;
1493                    rightstep=leftsize;
1494                    leftstep=0;
1495                    numsteps=m_right->getNumDPPSample();
1496                    if (LS)
1497                    {
1498                       numsteps*=rightsize;
1499                    }
1500            }
1501    }    }
1502    return result;  
1503      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1504            // Get the values of sub-expressions
1505      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1506      const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1507    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1508    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1509    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1510    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1511    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1512    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1513    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1514    
1515    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1516    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1517    
1518    
1519      roffset=m_samplesize*tid;
1520      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1521      switch(m_op)
1522      {
1523        case ADD:
1524            //PROC_OP(NO_ARG,plus<double>());
1525          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1526                 &(*left)[0],
1527                 &(*right)[0],
1528                 chunksize,
1529                 onumsteps,
1530                 numsteps,
1531                 resultStep,
1532                 leftstep,
1533                 rightstep,
1534                 oleftstep,
1535                 orightstep,
1536                 lroffset,
1537                 rroffset,
1538                 escript::ESFunction::PLUSF);  
1539            break;
1540        case SUB:
1541          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1542                 &(*left)[0],
1543                 &(*right)[0],
1544                 chunksize,
1545                 onumsteps,
1546                 numsteps,
1547                 resultStep,
1548                 leftstep,
1549                 rightstep,
1550                 oleftstep,
1551                 orightstep,
1552                 lroffset,
1553                 rroffset,
1554                 escript::ESFunction::MINUSF);        
1555            //PROC_OP(NO_ARG,minus<double>());
1556            break;
1557        case MUL:
1558            //PROC_OP(NO_ARG,multiplies<double>());
1559          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1560                 &(*left)[0],
1561                 &(*right)[0],
1562                 chunksize,
1563                 onumsteps,
1564                 numsteps,
1565                 resultStep,
1566                 leftstep,
1567                 rightstep,
1568                 oleftstep,
1569                 orightstep,
1570                 lroffset,
1571                 rroffset,
1572                 escript::ESFunction::MULTIPLIESF);      
1573            break;
1574        case DIV:
1575            //PROC_OP(NO_ARG,divides<double>());
1576          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1577                 &(*left)[0],
1578                 &(*right)[0],
1579                 chunksize,
1580                 onumsteps,
1581                 numsteps,
1582                 resultStep,
1583                 leftstep,
1584                 rightstep,
1585                 oleftstep,
1586                 orightstep,
1587                 lroffset,
1588                 rroffset,
1589                 escript::ESFunction::DIVIDESF);          
1590            break;
1591        case POW:
1592           //PROC_OP(double (double,double),::pow);
1593          escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1594                 &(*left)[0],
1595                 &(*right)[0],
1596                 chunksize,
1597                 onumsteps,
1598                 numsteps,
1599                 resultStep,
1600                 leftstep,
1601                 rightstep,
1602                 oleftstep,
1603                 orightstep,
1604                 lroffset,
1605                 rroffset,
1606                 escript::ESFunction::POWF);          
1607            break;
1608        default:
1609            throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1610      }
1611    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1612      return &m_samples;
1613    }
1614    
1615    
1616    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1617    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1618    // unlike the other resolve helpers, we must treat these datapoints separately.
1619    const DataTypes::RealVectorType*
1620    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1621    {
1622    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1623    
1624      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1625            // first work out which of the children are expanded
1626      bool leftExp=(m_left->m_readytype=='E');
1627      bool rightExp=(m_right->m_readytype=='E');
1628      int steps=getNumDPPSample();
1629      int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1630      int rightStep=(rightExp?m_right->getNoValues() : 0);
1631    
1632      int resultStep=getNoValues();
1633      roffset=m_samplesize*tid;
1634      size_t offset=roffset;
1635    
1636      const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1637    
1638      const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1639    
1640    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1641    cout << getNoValues() << endl;)
1642    
1643    
1644    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1645    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1646    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1647    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1648    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1649    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1650    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1651    
1652      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1653      switch(m_op)
1654      {
1655        case PROD:
1656            for (int i=0;i<steps;++i,resultp+=resultStep)
1657            {
1658              const double *ptr_0 = &((*left)[lroffset]);
1659              const double *ptr_1 = &((*right)[rroffset]);
1660    
1661    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1662    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1663    
1664              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1665    
1666              lroffset+=leftStep;
1667              rroffset+=rightStep;
1668            }
1669            break;
1670        default:
1671            throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1672      }
1673      roffset=offset;
1674      return &m_samples;
1675    }
1676    
1677    
1678    const DataTypes::RealVectorType*
1679    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1680    {
1681    #ifdef _OPENMP
1682            int tid=omp_get_thread_num();
1683    #else
1684            int tid=0;
1685    #endif
1686    
1687    #ifdef LAZY_STACK_PROF
1688            stackstart[tid]=&tid;
1689            stackend[tid]=&tid;
1690            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1691            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1692            #pragma omp critical
1693            if (d>maxstackuse)
1694            {
1695    cout << "Max resolve Stack use " << d << endl;
1696                    maxstackuse=d;
1697            }
1698            return r;
1699    #else
1700            return resolveNodeSample(tid, sampleNo, roffset);
1701    #endif
1702  }  }
1703    
1704    
1705    // This needs to do the work of the identity constructor
1706    void
1707    DataLazy::resolveToIdentity()
1708    {
1709       if (m_op==IDENTITY)
1710            return;
1711       DataReady_ptr p=resolveNodeWorker();
1712       makeIdentity(p);
1713    }
1714    
1715    void DataLazy::makeIdentity(const DataReady_ptr& p)
1716    {
1717       m_op=IDENTITY;
1718       m_axis_offset=0;
1719       m_transpose=0;
1720       m_SL=m_SM=m_SR=0;
1721       m_children=m_height=0;
1722       m_id=p;
1723       if(p->isConstant()) {m_readytype='C';}
1724       else if(p->isExpanded()) {m_readytype='E';}
1725       else if (p->isTagged()) {m_readytype='T';}
1726       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1727       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1728       m_left.reset();
1729       m_right.reset();
1730    }
1731    
1732    
1733  DataReady_ptr  DataReady_ptr
1734  DataLazy::resolve()  DataLazy::resolve()
1735  {  {
1736    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1737    // so the allocation of v needs to move inside the loop somehow      return m_id;
1738    }
1739    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1740    
1741    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1742    int numthreads=1;  void
1743    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1744    {
1745      if (dats.empty())
1746      {
1747            return;
1748      }
1749      vector<DataLazy*> work;
1750      FunctionSpace fs=dats[0]->getFunctionSpace();
1751      bool match=true;
1752      for (int i=dats.size()-1;i>=0;--i)
1753      {
1754            if (dats[i]->m_readytype!='E')
1755            {
1756                    dats[i]->collapse();
1757            }
1758            if (dats[i]->m_op!=IDENTITY)
1759            {
1760                    work.push_back(dats[i]);
1761                    if (fs!=dats[i]->getFunctionSpace())
1762                    {
1763                            match=false;
1764                    }
1765            }
1766      }
1767      if (work.empty())
1768      {
1769            return;         // no work to do
1770      }
1771      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1772      {             // it is possible that dats[0] is one of the objects which we discarded and
1773                    // all the other functionspaces match.
1774            vector<DataExpanded*> dep;
1775            vector<RealVectorType*> vecs;
1776            for (int i=0;i<work.size();++i)
1777            {
1778                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1779                    vecs.push_back(&(dep[i]->getVectorRW()));
1780            }
1781            int totalsamples=work[0]->getNumSamples();
1782            const RealVectorType* res=0; // Storage for answer
1783            int sample;
1784            #pragma omp parallel private(sample, res)
1785            {
1786                size_t roffset=0;
1787                #pragma omp for schedule(static)
1788                for (sample=0;sample<totalsamples;++sample)
1789                {
1790                    roffset=0;
1791                    int j;
1792                    for (j=work.size()-1;j>=0;--j)
1793                    {
1794  #ifdef _OPENMP  #ifdef _OPENMP
1795    numthreads=omp_get_max_threads();                      res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1796    int threadnum=0;  #else
1797  #endif                      res=work[j]->resolveNodeSample(0,sample,roffset);
1798    ValueType v(numthreads*threadbuffersize);  #endif
1799  cout << "Buffer created with size=" << v.size() << endl;                      RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1800    ValueType dummy(getNoValues());                      memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1801    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);                  }
1802    ValueType& resvec=result->getVector();              }
1803            }
1804            // Now we need to load the new results as identity ops into the lazy nodes
1805            for (int i=work.size()-1;i>=0;--i)
1806            {
1807                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1808            }
1809      }
1810      else  // functionspaces do not match
1811      {
1812            for (int i=0;i<work.size();++i)
1813            {
1814                    work[i]->resolveToIdentity();
1815            }
1816      }
1817    }
1818    
1819    
1820    
1821    // This version of resolve uses storage in each node to hold results
1822    DataReady_ptr
1823    DataLazy::resolveNodeWorker()
1824    {
1825      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1826      {
1827        collapse();
1828      }
1829      if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1830      {
1831        return m_id;
1832      }
1833            // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1834      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1835      RealVectorType& resvec=result->getVectorRW();
1836    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1837    
1838    int sample;    int sample;
   int resoffset;  
1839    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1840    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const RealVectorType* res=0;       // Storage for answer
1841    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1842      #pragma omp parallel private(sample,res)
1843    {    {
1844            size_t roffset=0;
1845    #ifdef LAZY_STACK_PROF
1846            stackstart[omp_get_thread_num()]=&roffset;
1847            stackend[omp_get_thread_num()]=&roffset;
1848    #endif
1849            #pragma omp for schedule(static)
1850            for (sample=0;sample<totalsamples;++sample)
1851            {
1852                    roffset=0;
1853  #ifdef _OPENMP  #ifdef _OPENMP
1854      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1855  #else  #else
1856      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1857  #endif  #endif
1858      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1859      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1860      {                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1861      resvec[resoffset]=res[i];                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1862      }          }
1863      }
1864    #ifdef LAZY_STACK_PROF
1865      for (int i=0;i<getNumberOfThreads();++i)
1866      {
1867            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1868    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1869            if (r>maxstackuse)
1870            {
1871                    maxstackuse=r;
1872            }
1873    }    }
1874      cout << "Max resolve Stack use=" << maxstackuse << endl;
1875    #endif
1876    return resptr;    return resptr;
1877  }  }
1878    
# Line 430  std::string Line 1880  std::string
1880  DataLazy::toString() const  DataLazy::toString() const
1881  {  {
1882    ostringstream oss;    ostringstream oss;
1883    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1884    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1885      {
1886      case 1:       // tree format
1887            oss << endl;
1888            intoTreeString(oss,"");
1889            break;
1890      case 2:       // just the depth
1891            break;
1892      default:
1893            intoString(oss);
1894            break;
1895      }
1896    return oss.str();    return oss.str();
1897  }  }
1898    
1899    
1900  void  void
1901  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1902  {  {
1903    //    oss << "[" << m_children <<";"<<m_height <<"]";
1904    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1905    {    {
1906    case G_IDENTITY:    case G_IDENTITY:
1907      oss << '@' << m_id.get();          if (m_id->isExpanded())
1908      break;          {
1909               oss << "E";
1910            }
1911            else if (m_id->isTagged())
1912            {
1913              oss << "T";
1914            }
1915            else if (m_id->isConstant())
1916            {
1917              oss << "C";
1918            }
1919            else
1920            {
1921              oss << "?";
1922            }
1923            oss << '@' << m_id.get();
1924            break;
1925    case G_BINARY:    case G_BINARY:
1926      oss << '(';          oss << '(';
1927      m_left->intoString(oss);          m_left->intoString(oss);
1928      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1929      m_right->intoString(oss);          m_right->intoString(oss);
1930      oss << ')';          oss << ')';
1931      break;          break;
1932    case G_UNARY:    case G_UNARY:
1933      oss << opToString(m_op) << '(';    case G_UNARY_P:
1934      m_left->intoString(oss);    case G_NP1OUT:
1935      oss << ')';    case G_NP1OUT_P:
1936      break;    case G_REDUCTION:
1937            oss << opToString(m_op) << '(';
1938            m_left->intoString(oss);
1939            oss << ')';
1940            break;
1941      case G_TENSORPROD:
1942            oss << opToString(m_op) << '(';
1943            m_left->intoString(oss);
1944            oss << ", ";
1945            m_right->intoString(oss);
1946            oss << ')';
1947            break;
1948      case G_NP1OUT_2P:
1949            oss << opToString(m_op) << '(';
1950            m_left->intoString(oss);
1951            oss << ", " << m_axis_offset << ", " << m_transpose;
1952            oss << ')';
1953            break;
1954      case G_CONDEVAL:
1955            oss << opToString(m_op)<< '(' ;
1956            m_mask->intoString(oss);
1957            oss << " ? ";
1958            m_left->intoString(oss);
1959            oss << " : ";
1960            m_right->intoString(oss);
1961            oss << ')';
1962            break;
1963      default:
1964            oss << "UNKNOWN";
1965      }
1966    }
1967    
1968    
1969    void
1970    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1971    {
1972      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1973      switch (getOpgroup(m_op))
1974      {
1975      case G_IDENTITY:
1976            if (m_id->isExpanded())
1977            {
1978               oss << "E";
1979            }
1980            else if (m_id->isTagged())
1981            {
1982              oss << "T";
1983            }
1984            else if (m_id->isConstant())
1985            {
1986              oss << "C";
1987            }
1988            else
1989            {
1990              oss << "?";
1991            }
1992            oss << '@' << m_id.get() << endl;
1993            break;
1994      case G_BINARY:
1995            oss << opToString(m_op) << endl;
1996            indent+='.';
1997            m_left->intoTreeString(oss, indent);
1998            m_right->intoTreeString(oss, indent);
1999            break;
2000      case G_UNARY:
2001      case G_UNARY_P:
2002      case G_NP1OUT:
2003      case G_NP1OUT_P:
2004      case G_REDUCTION:
2005            oss << opToString(m_op) << endl;
2006            indent+='.';
2007            m_left->intoTreeString(oss, indent);
2008            break;
2009      case G_TENSORPROD:
2010            oss << opToString(m_op) << endl;
2011            indent+='.';
2012            m_left->intoTreeString(oss, indent);
2013            m_right->intoTreeString(oss, indent);
2014            break;
2015      case G_NP1OUT_2P:
2016            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2017            indent+='.';
2018            m_left->intoTreeString(oss, indent);
2019            break;
2020    default:    default:
2021      oss << "UNKNOWN";          oss << "UNKNOWN";
2022    }    }
2023  }  }
2024    
2025  // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
2026  DataAbstract*  DataAbstract*
2027  DataLazy::deepCopy()  DataLazy::deepCopy() const
2028  {  {
2029    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
2030    {    {
2031      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2032      case G_UNARY:
2033      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2034      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2035      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2036      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2037      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2038      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2039      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2040      default:
2041            throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2042    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2043  }  }
2044    
2045    
2046  DataTypes::ValueType::size_type  
2047    // There is no single, natural interpretation of getLength on DataLazy.
2048    // Instances of DataReady can look at the size of their vectors.
2049    // For lazy though, it could be the size the data would be if it were resolved;
2050    // or it could be some function of the lengths of the DataReady instances which
2051    // form part of the expression.
2052    // Rather than have people making assumptions, I have disabled the method.
2053    DataTypes::RealVectorType::size_type
2054  DataLazy::getLength() const  DataLazy::getLength() const
2055  {  {
2056    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2057  }  }
2058    
2059    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 2063  DataLazy::getSlice(const DataTypes::Regi
2063    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2064  }  }
2065    
2066  DataTypes::ValueType::size_type  
2067    // To do this we need to rely on our child nodes
2068    DataTypes::RealVectorType::size_type
2069    DataLazy::getPointOffset(int sampleNo,
2070                     int dataPointNo)
2071    {
2072      if (m_op==IDENTITY)
2073      {
2074            return m_id->getPointOffset(sampleNo,dataPointNo);
2075      }
2076      if (m_readytype!='E')
2077      {
2078            collapse();
2079            return m_id->getPointOffset(sampleNo,dataPointNo);
2080      }
2081      // at this point we do not have an identity node and the expression will be Expanded
2082      // so we only need to know which child to ask
2083      if (m_left->m_readytype=='E')
2084      {
2085            return m_left->getPointOffset(sampleNo,dataPointNo);
2086      }
2087      else
2088      {
2089            return m_right->getPointOffset(sampleNo,dataPointNo);
2090      }
2091    }
2092    
2093    // To do this we need to rely on our child nodes
2094    DataTypes::RealVectorType::size_type
2095  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2096                   int dataPointNo) const                   int dataPointNo) const
2097  {  {
2098    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2099      {
2100            return m_id->getPointOffset(sampleNo,dataPointNo);
2101      }
2102      if (m_readytype=='E')
2103      {
2104        // at this point we do not have an identity node and the expression will be Expanded
2105        // so we only need to know which child to ask
2106        if (m_left->m_readytype=='E')
2107        {
2108            return m_left->getPointOffset(sampleNo,dataPointNo);
2109        }
2110        else
2111        {
2112            return m_right->getPointOffset(sampleNo,dataPointNo);
2113        }
2114      }
2115      if (m_readytype=='C')
2116      {
2117            return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2118      }
2119      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2120    }
2121    
2122    
2123    // I have decided to let Data:: handle this issue.
2124    void
2125    DataLazy::setToZero()
2126    {
2127    //   DataTypes::RealVectorType v(getNoValues(),0);
2128    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2129    //   m_op=IDENTITY;
2130    //   m_right.reset();  
2131    //   m_left.reset();
2132    //   m_readytype='C';
2133    //   m_buffsRequired=1;
2134    
2135      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2136      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2137  }  }
2138    
2139  }   // end namespace  bool
2140    DataLazy::actsExpanded() const
2141    {
2142            return (m_readytype=='E');
2143    }
2144    
2145    } // end namespace
2146    

Legend:
Removed from v.1888  
changed lines
  Added in v.6144

  ViewVC Help
Powered by ViewVC 1.1.26