/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1886 by jfenwick, Wed Oct 15 01:34:18 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 5997 by caltinay, Mon Feb 29 07:24:47 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21    #include "Data.h"
22    #include "DataTypes.h"
23    #include "EscriptParams.h"
24    #include "FunctionSpace.h"
25    #include "UnaryFuncs.h"    // for escript::fsign
26    #include "Utils.h"
27    
28  #ifdef USE_NETCDF  #ifdef USE_NETCDF
29  #include <netcdfcpp.h>  #include <netcdfcpp.h>
30  #endif  #endif
31  #ifdef PASO_MPI  
32  #include <mpi.h>  #include <iomanip> // for some fancy formatting in debug
33  #endif  
34  #ifdef _OPENMP  using namespace escript::DataTypes;
35  #include <omp.h>  
36  #endif  #define NO_ARG
37  #include "FunctionSpace.h"  
38  #include "DataTypes.h"  // #define LAZYDEBUG(X) if (privdebug){X;}
39  #include "Data.h"  #define LAZYDEBUG(X)
40  #include "UnaryFuncs.h"     // for escript::fsign  namespace
41    {
42    bool privdebug=false;
43    
44    #define ENABLEDEBUG privdebug=true;
45    #define DISABLEDEBUG privdebug=false;
46    }
47    
48    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
49    
50    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
51    
52    
53    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
54    
55    /*
56    How does DataLazy work?
57    ~~~~~~~~~~~~~~~~~~~~~~~
58    
59    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
60    denoted left and right.
61    
62    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
63    This means that all "internal" nodes in the structure are instances of DataLazy.
64    
65    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
66    Note that IDENTITY is not considered a unary operation.
67    
68    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
69    It must however form a DAG (directed acyclic graph).
70    I will refer to individual DataLazy objects with the structure as nodes.
71    
72    Each node also stores:
73    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
74            evaluated.
75    - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
76            evaluate the expression.
77    - m_samplesize ~ the number of doubles stored in a sample.
78    
79    When a new node is created, the above values are computed based on the values in the child nodes.
80    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
81    
82    The resolve method, which produces a DataReady from a DataLazy, does the following:
83    1) Create a DataReady to hold the new result.
84    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
85    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
86    
87    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
88    
89    resolveSample returns a Vector* and an offset within that vector where the result is stored.
90    Normally, this would be v, but for identity nodes their internal vector is returned instead.
91    
92    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
93    
94    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
95    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
96    
97    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
98    1) Add to the ES_optype.
99    2) determine what opgroup your operation belongs to (X)
100    3) add a string for the op to the end of ES_opstrings
101    4) increase ES_opcount
102    5) add an entry (X) to opgroups
103    6) add an entry to the switch in collapseToReady
104    7) add an entry to resolveX
105    */
106    
107    
108  using namespace std;  using namespace std;
109  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 111  using namespace boost;
111  namespace escript  namespace escript
112  {  {
113    
 const std::string&  
 opToString(ES_optype op);  
   
114  namespace  namespace
115  {  {
116    
117    
118    // enabling this will print out when ever the maximum stacksize used by resolve increases
119    // it assumes _OPENMP is also in use
120    //#define LAZY_STACK_PROF
121    
122    
123    
124    #ifndef _OPENMP
125      #ifdef LAZY_STACK_PROF
126      #undef LAZY_STACK_PROF
127      #endif
128    #endif
129    
130    
131    #ifdef LAZY_STACK_PROF
132    std::vector<void*> stackstart(getNumberOfThreads());
133    std::vector<void*> stackend(getNumberOfThreads());
134    size_t maxstackuse=0;
135    #endif
136    
137  enum ES_opgroup  enum ES_opgroup
138  {  {
139     G_UNKNOWN,     G_UNKNOWN,
140     G_IDENTITY,     G_IDENTITY,
141     G_BINARY,     G_BINARY,            // pointwise operations with two arguments
142     G_UNARY     G_UNARY,             // pointwise operations with one argument
143       G_UNARY_P,           // pointwise operations with one argument, requiring a parameter
144       G_NP1OUT,            // non-pointwise op with one output
145       G_NP1OUT_P,          // non-pointwise op with one output requiring a parameter
146       G_TENSORPROD,        // general tensor product
147       G_NP1OUT_2P,         // non-pointwise op with one output requiring two params
148       G_REDUCTION,         // non-pointwise unary op with a scalar output
149       G_CONDEVAL
150  };  };
151    
152    
153    
154    
155  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
156              "asin","acos","atan","sinh","cosh","tanh","erf",                          "sin","cos","tan",
157              "asinh","acosh","atanh",                          "asin","acos","atan","sinh","cosh","tanh","erf",
158              "log10","log","sign","abs","neg","pos"};                          "asinh","acosh","atanh",
159  int ES_opcount=25;                          "log10","log","sign","abs","neg","pos","exp","sqrt",
160  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9                          "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
161              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16                          "symmetric","nonsymmetric",
162              G_UNARY,G_UNARY,G_UNARY,                    // 19                          "prod",
163              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};       // 25                          "transpose", "trace",
164                            "swapaxes",
165                            "minval", "maxval",
166                            "condEval"};
167    int ES_opcount=44;
168    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
169                            G_UNARY,G_UNARY,G_UNARY, //10
170                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 17
171                            G_UNARY,G_UNARY,G_UNARY,                                        // 20
172                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28
173                            G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,          // 35
174                            G_NP1OUT,G_NP1OUT,
175                            G_TENSORPROD,
176                            G_NP1OUT_P, G_NP1OUT_P,
177                            G_NP1OUT_2P,
178                            G_REDUCTION, G_REDUCTION,
179                            G_CONDEVAL};
180  inline  inline
181  ES_opgroup  ES_opgroup
182  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 73  getOpgroup(ES_optype op) Line 188  getOpgroup(ES_optype op)
188  FunctionSpace  FunctionSpace
189  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
190  {  {
191      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
192      // maybe we need an interpolate node -          // maybe we need an interpolate node -
193      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
194      // programming error exception.          // programming error exception.
195    
196      FunctionSpace l=left->getFunctionSpace();
197      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
198      {    if (l!=r)
199          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
200      }      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
201      return left->getFunctionSpace();      if (res==1)
202        {
203            return l;
204        }
205        if (res==-1)
206        {
207            return r;
208        }
209        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
210      }
211      return l;
212  }  }
213    
214  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
215    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
216  DataTypes::ShapeType  DataTypes::ShapeType
217  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
218  {  {
219      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
220      {          {
221          throw DataException("Shapes not the same - shapes must match for lazy data.");            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
222      }            {
223      return left->getShape();                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
224              }
225    
226              if (left->getRank()==0)       // we need to allow scalar * anything
227              {
228                    return right->getShape();
229              }
230              if (right->getRank()==0)
231              {
232                    return left->getShape();
233              }
234              throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
235            }
236            return left->getShape();
237  }  }
238    
239  size_t  // return the shape for "op left"
240  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
241    DataTypes::ShapeType
242    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
243  {  {
244     switch (getOpgroup(op))          switch(op)
245     {          {
246     case G_BINARY: return left->getLength();          case TRANS:
247     case G_UNARY: return left->getLength();             {                    // for the scoping of variables
248     default:                  const DataTypes::ShapeType& s=left->getShape();
249      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");                  DataTypes::ShapeType sh;
250     }                  int rank=left->getRank();
251                    if (axis_offset<0 || axis_offset>rank)
252                    {
253                stringstream e;
254                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
255                throw DataException(e.str());
256            }
257            for (int i=0; i<rank; i++)
258                    {
259                       int index = (axis_offset+i)%rank;
260               sh.push_back(s[index]); // Append to new shape
261            }
262                    return sh;
263               }
264            break;
265            case TRACE:
266               {
267                    int rank=left->getRank();
268                    if (rank<2)
269                    {
270                       throw DataException("Trace can only be computed for objects with rank 2 or greater.");
271                    }
272                    if ((axis_offset>rank-2) || (axis_offset<0))
273                    {
274                       throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
275                    }
276                    if (rank==2)
277                    {
278                       return DataTypes::scalarShape;
279                    }
280                    else if (rank==3)
281                    {
282                       DataTypes::ShapeType sh;
283                       if (axis_offset==0)
284                       {
285                            sh.push_back(left->getShape()[2]);
286                       }
287                       else         // offset==1
288                       {
289                            sh.push_back(left->getShape()[0]);
290                       }
291                       return sh;
292                    }
293                    else if (rank==4)
294                    {
295                       DataTypes::ShapeType sh;
296                       const DataTypes::ShapeType& s=left->getShape();
297                       if (axis_offset==0)
298                       {
299                            sh.push_back(s[2]);
300                            sh.push_back(s[3]);
301                       }
302                       else if (axis_offset==1)
303                       {
304                            sh.push_back(s[0]);
305                            sh.push_back(s[3]);
306                       }
307                       else         // offset==2
308                       {
309                            sh.push_back(s[0]);
310                            sh.push_back(s[1]);
311                       }
312                       return sh;
313                    }
314                    else            // unknown rank
315                    {
316                       throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
317                    }
318               }
319            break;
320            default:
321            throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
322            }
323  }  }
324    
325  int  DataTypes::ShapeType
326  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
327  {  {
328     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
329     {       // Some of the checks are probably redundant here
330     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
331     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
332     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
333     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
334      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
335     }       int rank=left->getRank();
336         if (rank<2) {
337            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
338         }
339         if (axis0<0 || axis0>rank-1) {
340            stringstream e;
341            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
342            throw DataException(e.str());
343         }
344         if (axis1<0 || axis1>rank-1) {
345            stringstream e;
346            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
347            throw DataException(e.str());
348         }
349         if (axis0 == axis1) {
350             throw DataException("Error - Data::swapaxes: axis indices must be different.");
351         }
352         if (axis0 > axis1) {
353             axis0_tmp=axis1;
354             axis1_tmp=axis0;
355         } else {
356             axis0_tmp=axis0;
357             axis1_tmp=axis1;
358         }
359         for (int i=0; i<rank; i++) {
360           if (i == axis0_tmp) {
361              out_shape.push_back(s[axis1_tmp]);
362           } else if (i == axis1_tmp) {
363              out_shape.push_back(s[axis0_tmp]);
364           } else {
365              out_shape.push_back(s[i]);
366           }
367         }
368        return out_shape;
369  }  }
370    
371    
372    // determine the output shape for the general tensor product operation
373    // the additional parameters return information required later for the product
374    // the majority of this code is copy pasted from C_General_Tensor_Product
375    DataTypes::ShapeType
376    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
377    {
378            
379      // Get rank and shape of inputs
380      int rank0 = left->getRank();
381      int rank1 = right->getRank();
382      const DataTypes::ShapeType& shape0 = left->getShape();
383      const DataTypes::ShapeType& shape1 = right->getShape();
384    
385      // Prepare for the loops of the product and verify compatibility of shapes
386      int start0=0, start1=0;
387      if (transpose == 0)           {}
388      else if (transpose == 1)      { start0 = axis_offset; }
389      else if (transpose == 2)      { start1 = rank1-axis_offset; }
390      else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
391    
392  }   // end anonymous namespace    if (rank0<axis_offset)
393      {
394            throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
395      }
396    
397      // Adjust the shapes for transpose
398      DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
399      DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
400      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
401      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
402    
403      // Prepare for the loops of the product
404      SL=1, SM=1, SR=1;
405      for (int i=0; i<rank0-axis_offset; i++)       {
406        SL *= tmpShape0[i];
407      }
408      for (int i=rank0-axis_offset; i<rank0; i++)   {
409        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
410          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
411        }
412        SM *= tmpShape0[i];
413      }
414      for (int i=axis_offset; i<rank1; i++)         {
415        SR *= tmpShape1[i];
416      }
417    
418      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
419      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
420      {                     // block to limit the scope of out_index
421         int out_index=0;
422         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
423         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
424      }
425    
426      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
427      {
428         ostringstream os;
429         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
430         throw DataException(os.str());
431      }
432    
433      return shape2;
434    }
435    
436    }       // end anonymous namespace
437    
438    
439    
440    // Return a string representing the operation
441  const std::string&  const std::string&
442  opToString(ES_optype op)  opToString(ES_optype op)
443  {  {
# Line 137  opToString(ES_optype op) Line 448  opToString(ES_optype op)
448    return ES_opstrings[op];    return ES_opstrings[op];
449  }  }
450    
451    void DataLazy::LazyNodeSetup()
452    {
453    #ifdef _OPENMP
454        int numthreads=omp_get_max_threads();
455        m_samples.resize(numthreads*m_samplesize);
456        m_sampleids=new int[numthreads];
457        for (int i=0;i<numthreads;++i)
458        {
459            m_sampleids[i]=-1;  
460        }
461    #else
462        m_samples.resize(m_samplesize);
463        m_sampleids=new int[1];
464        m_sampleids[0]=-1;
465    #endif  // _OPENMP
466    }
467    
468    
469    // Creates an identity node
470  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
471      : parent(p->getFunctionSpace(),p->getShape()),          : parent(p->getFunctionSpace(),p->getShape())
472      m_op(IDENTITY)          ,m_sampleids(0),
473            m_samples(1)
474  {  {
475     if (p->isLazy())     if (p->isLazy())
476     {     {
477      // TODO: fix this.   We could make the new node a copy of p?          // I don't want identity of Lazy.
478      // I don't want identity of Lazy.          // Question: Why would that be so bad?
479      // Question: Why would that be so bad?          // Answer: We assume that the child of ID is something we can call getVector on
480      // Answer: We assume that the child of ID is something we can call getVector on          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
     throw DataException("Programmer error - attempt to create identity from a DataLazy.");  
481     }     }
482     else     else
483     {     {
484      m_id=dynamic_pointer_cast<DataReady>(p);          p->makeLazyShared();
485            DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
486            makeIdentity(dr);
487    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
488     }     }
489     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
490  }  }
491    
492  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
493      : parent(left->getFunctionSpace(),left->getShape()),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
494      m_op(op)          m_op(op),
495            m_axis_offset(0),
496            m_transpose(0),
497            m_SL(0), m_SM(0), m_SR(0)
498  {  {
499     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
500     {     {
501      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
502     }     }
503    
504     DataLazy_ptr lleft;     DataLazy_ptr lleft;
505     if (!left->isLazy())     if (!left->isLazy())
506     {     {
507      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
508     }     }
509     else     else
510     {     {
511      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
512     }     }
513     m_length=left->getLength();     m_readytype=lleft->m_readytype;
514     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
515     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
516       m_children=m_left->m_children+1;
517       m_height=m_left->m_height+1;
518       LazyNodeSetup();
519       SIZELIMIT
520  }  }
521    
522    
523  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
524      : parent(resultFS(left,right,op), resultShape(left,right,op)),  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
525      m_left(left),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
526      m_right(right),          m_op(op),
527      m_op(op)          m_SL(0), m_SM(0), m_SR(0)
528  {  {
529     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
530       if ((getOpgroup(op)!=G_BINARY))
531       {
532            throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
533       }
534    
535       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
536       {
537            FunctionSpace fs=getFunctionSpace();
538            Data ltemp(left);
539            Data tmp(ltemp,fs);
540            left=tmp.borrowDataPtr();
541       }
542       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
543       {
544            Data tmp(Data(right),getFunctionSpace());
545            right=tmp.borrowDataPtr();
546    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
547       }
548       left->operandCheck(*right);
549    
550       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
551       {
552            m_left=dynamic_pointer_cast<DataLazy>(left);
553    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
554       }
555       else
556       {
557            m_left=DataLazy_ptr(new DataLazy(left));
558    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
559       }
560       if (right->isLazy())
561     {     {
562      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          m_right=dynamic_pointer_cast<DataLazy>(right);
563    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
564       }
565       else
566       {
567            m_right=DataLazy_ptr(new DataLazy(right));
568    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
569       }
570       char lt=m_left->m_readytype;
571       char rt=m_right->m_readytype;
572       if (lt=='E' || rt=='E')
573       {
574            m_readytype='E';
575       }
576       else if (lt=='T' || rt=='T')
577       {
578            m_readytype='T';
579       }
580       else
581       {
582            m_readytype='C';
583     }     }
    m_length=resultLength(m_left,m_right,m_op);  
584     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
585     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+m_right->m_children+2;
586  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
587       LazyNodeSetup();
588       SIZELIMIT
589    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
590  }  }
591    
592  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
593      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
594      m_op(op)          m_op(op),
595            m_axis_offset(axis_offset),
596            m_transpose(transpose)
597  {  {
598     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
599     {     {
600      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
601     }     }
602     if (left->isLazy())     if ((transpose>2) || (transpose<0))
603     {     {
604      m_left=dynamic_pointer_cast<DataLazy>(left);          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
605       }
606       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
607       {
608            FunctionSpace fs=getFunctionSpace();
609            Data ltemp(left);
610            Data tmp(ltemp,fs);
611            left=tmp.borrowDataPtr();
612       }
613       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
614       {
615            Data tmp(Data(right),getFunctionSpace());
616            right=tmp.borrowDataPtr();
617       }
618    //    left->operandCheck(*right);
619    
620       if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
621       {
622            m_left=dynamic_pointer_cast<DataLazy>(left);
623     }     }
624     else     else
625     {     {
626      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
627     }     }
628     if (right->isLazy())     if (right->isLazy())
629     {     {
630      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
631       }
632       else
633       {
634            m_right=DataLazy_ptr(new DataLazy(right));
635       }
636       char lt=m_left->m_readytype;
637       char rt=m_right->m_readytype;
638       if (lt=='E' || rt=='E')
639       {
640            m_readytype='E';
641       }
642       else if (lt=='T' || rt=='T')
643       {
644            m_readytype='T';
645       }
646       else
647       {
648            m_readytype='C';
649       }
650       m_samplesize=getNumDPPSample()*getNoValues();
651       m_children=m_left->m_children+m_right->m_children+2;
652       m_height=max(m_left->m_height,m_right->m_height)+1;
653       LazyNodeSetup();
654       SIZELIMIT
655    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
656    }
657    
658    
659    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
660            : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
661            m_op(op),
662            m_axis_offset(axis_offset),
663            m_transpose(0),
664            m_tol(0)
665    {
666       if ((getOpgroup(op)!=G_NP1OUT_P))
667       {
668            throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
669       }
670       DataLazy_ptr lleft;
671       if (!left->isLazy())
672       {
673            lleft=DataLazy_ptr(new DataLazy(left));
674       }
675       else
676       {
677            lleft=dynamic_pointer_cast<DataLazy>(left);
678       }
679       m_readytype=lleft->m_readytype;
680       m_left=lleft;
681       m_samplesize=getNumDPPSample()*getNoValues();
682       m_children=m_left->m_children+1;
683       m_height=m_left->m_height+1;
684       LazyNodeSetup();
685       SIZELIMIT
686    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
687    }
688    
689    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
690            : parent(left->getFunctionSpace(), left->getShape()),
691            m_op(op),
692            m_axis_offset(0),
693            m_transpose(0),
694            m_tol(tol)
695    {
696       if ((getOpgroup(op)!=G_UNARY_P))
697       {
698            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
699       }
700       DataLazy_ptr lleft;
701       if (!left->isLazy())
702       {
703            lleft=DataLazy_ptr(new DataLazy(left));
704     }     }
705     else     else
706     {     {
707      m_right=DataLazy_ptr(new DataLazy(right));          lleft=dynamic_pointer_cast<DataLazy>(left);
708     }     }
709       m_readytype=lleft->m_readytype;
710       m_left=lleft;
711       m_samplesize=getNumDPPSample()*getNoValues();
712       m_children=m_left->m_children+1;
713       m_height=m_left->m_height+1;
714       LazyNodeSetup();
715       SIZELIMIT
716    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
717    }
718    
719    
720     m_length=resultLength(m_left,m_right,m_op);  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
721            : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
722            m_op(op),
723            m_axis_offset(axis0),
724            m_transpose(axis1),
725            m_tol(0)
726    {
727       if ((getOpgroup(op)!=G_NP1OUT_2P))
728       {
729            throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
730       }
731       DataLazy_ptr lleft;
732       if (!left->isLazy())
733       {
734            lleft=DataLazy_ptr(new DataLazy(left));
735       }
736       else
737       {
738            lleft=dynamic_pointer_cast<DataLazy>(left);
739       }
740       m_readytype=lleft->m_readytype;
741       m_left=lleft;
742     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
743     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+1;
744  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
745       LazyNodeSetup();
746       SIZELIMIT
747    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
748  }  }
749    
750    
751  DataLazy::~DataLazy()  namespace
752    {
753    
754        inline int max3(int a, int b, int c)
755        {
756            int t=(a>b?a:b);
757            return (t>c?t:c);
758    
759        }
760    }
761    
762    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
763            : parent(left->getFunctionSpace(), left->getShape()),
764            m_op(CONDEVAL),
765            m_axis_offset(0),
766            m_transpose(0),
767            m_tol(0)
768  {  {
769    
770       DataLazy_ptr lmask;
771       DataLazy_ptr lleft;
772       DataLazy_ptr lright;
773       if (!mask->isLazy())
774       {
775            lmask=DataLazy_ptr(new DataLazy(mask));
776       }
777       else
778       {
779            lmask=dynamic_pointer_cast<DataLazy>(mask);
780       }
781       if (!left->isLazy())
782       {
783            lleft=DataLazy_ptr(new DataLazy(left));
784       }
785       else
786       {
787            lleft=dynamic_pointer_cast<DataLazy>(left);
788       }
789       if (!right->isLazy())
790       {
791            lright=DataLazy_ptr(new DataLazy(right));
792       }
793       else
794       {
795            lright=dynamic_pointer_cast<DataLazy>(right);
796       }
797       m_readytype=lmask->m_readytype;
798       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
799       {
800            throw DataException("Programmer Error - condEval arguments must have the same readytype");
801       }
802       m_left=lleft;
803       m_right=lright;
804       m_mask=lmask;
805       m_samplesize=getNumDPPSample()*getNoValues();
806       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
807       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
808       LazyNodeSetup();
809       SIZELIMIT
810    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
811  }  }
812    
813    
814  int  
815  DataLazy::getBuffsRequired() const  DataLazy::~DataLazy()
816  {  {
817      return m_buffsRequired;     delete[] m_sampleids;
818  }  }
819    
820    
821  // the vector and the offset are a place where the method could write its data if it wishes  /*
822  // it is not obligated to do so. For example, if it has its own storage already, it can use that.    \brief Evaluates the expression using methods on Data.
823  // Hence the return value to indicate where the data is actually stored.    This does the work for the collapse method.
824  // Regardless, the storage should be assumed to be used, even if it isn't.    For reasons of efficiency do not call this method on DataExpanded nodes.
825  const double*  */
826  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  DataReady_ptr
827    DataLazy::collapseToReady() const
828  {  {
829    if (m_op==IDENTITY)      if (m_readytype=='E')
830      {     // this is more an efficiency concern than anything else
831        throw DataException("Programmer Error - do not use collapse on Expanded data.");
832      }
833      if (m_op==IDENTITY)
834    {    {
835      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
836    }    }
837    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
838    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
839    const double* right=0;    Data right;
840    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
841    {    {
842      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
843    }    }
844    double* result=&(v[offset]);    Data result;
845      switch(m_op)
846    {    {
847      switch(m_op)      case ADD:
848      {          result=left+right;
849      case ADD:       // since these are pointwise ops, pretend each sample is one point          break;
850      tensor_binary_operation(m_samplesize, left, right, result, plus<double>());      case SUB:          
851      break;          result=left-right;
852      case SUB:                break;
853      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      case MUL:          
854      break;          result=left*right;
855      case MUL:                break;
856      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      case DIV:          
857      break;          result=left/right;
858      case DIV:                break;
     tensor_binary_operation(m_samplesize, left, right, result, divides<double>());  
     break;  
 // unary ops  
859      case SIN:      case SIN:
860      tensor_unary_operation(m_samplesize, left, result, ::sin);          result=left.sin();      
861      break;          break;
862      case COS:      case COS:
863      tensor_unary_operation(m_samplesize, left, result, ::cos);          result=left.cos();
864      break;          break;
865      case TAN:      case TAN:
866      tensor_unary_operation(m_samplesize, left, result, ::tan);          result=left.tan();
867      break;          break;
868      case ASIN:      case ASIN:
869      tensor_unary_operation(m_samplesize, left, result, ::asin);          result=left.asin();
870      break;          break;
871      case ACOS:      case ACOS:
872      tensor_unary_operation(m_samplesize, left, result, ::acos);          result=left.acos();
873      break;          break;
874      case ATAN:      case ATAN:
875      tensor_unary_operation(m_samplesize, left, result, ::atan);          result=left.atan();
876      break;          break;
877      case SINH:      case SINH:
878      tensor_unary_operation(m_samplesize, left, result, ::sinh);          result=left.sinh();
879      break;          break;
880      case COSH:      case COSH:
881      tensor_unary_operation(m_samplesize, left, result, ::cosh);          result=left.cosh();
882      break;          break;
883      case TANH:      case TANH:
884      tensor_unary_operation(m_samplesize, left, result, ::tanh);          result=left.tanh();
885      break;          break;
886      case ERF:      case ERF:
887  #ifdef _WIN32          result=left.erf();
888      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");          break;
889       case ASINH:
890            result=left.asinh();
891            break;
892       case ACOSH:
893            result=left.acosh();
894            break;
895       case ATANH:
896            result=left.atanh();
897            break;
898        case LOG10:
899            result=left.log10();
900            break;
901        case LOG:
902            result=left.log();
903            break;
904        case SIGN:
905            result=left.sign();
906            break;
907        case ABS:
908            result=left.abs();
909            break;
910        case NEG:
911            result=left.neg();
912            break;
913        case POS:
914            // it doesn't mean anything for delayed.
915            // it will just trigger a deep copy of the lazy object
916            throw DataException("Programmer error - POS not supported for lazy data.");
917            break;
918        case EXP:
919            result=left.exp();
920            break;
921        case SQRT:
922            result=left.sqrt();
923            break;
924        case RECIP:
925            result=left.oneOver();
926            break;
927        case GZ:
928            result=left.wherePositive();
929            break;
930        case LZ:
931            result=left.whereNegative();
932            break;
933        case GEZ:
934            result=left.whereNonNegative();
935            break;
936        case LEZ:
937            result=left.whereNonPositive();
938            break;
939        case NEZ:
940            result=left.whereNonZero(m_tol);
941            break;
942        case EZ:
943            result=left.whereZero(m_tol);
944            break;
945        case SYM:
946            result=left.symmetric();
947            break;
948        case NSYM:
949            result=left.nonsymmetric();
950            break;
951        case PROD:
952            result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
953            break;
954        case TRANS:
955            result=left.transpose(m_axis_offset);
956            break;
957        case TRACE:
958            result=left.trace(m_axis_offset);
959            break;
960        case SWAP:
961            result=left.swapaxes(m_axis_offset, m_transpose);
962            break;
963        case MINVAL:
964            result=left.minval();
965            break;
966        case MAXVAL:
967            result=left.minval();
968            break;
969        default:
970            throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
971      }
972      return result.borrowReadyPtr();
973    }
974    
975    /*
976       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
977       This method uses the original methods on the Data class to evaluate the expressions.
978       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
979       the purpose of using DataLazy in the first place).
980    */
981    void
982    DataLazy::collapse() const
983    {
984      if (m_op==IDENTITY)
985      {
986            return;
987      }
988      if (m_readytype=='E')
989      {     // this is more an efficiency concern than anything else
990        throw DataException("Programmer Error - do not use collapse on Expanded data.");
991      }
992      m_id=collapseToReady();
993      m_op=IDENTITY;
994    }
995    
996    
997    
998    
999    
1000    
1001    #define PROC_OP(TYPE,X)                               \
1002            for (int j=0;j<onumsteps;++j)\
1003            {\
1004              for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1005              { \
1006    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1007    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1008                 tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1009    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1010                 lroffset+=leftstep; \
1011                 rroffset+=rightstep; \
1012              }\
1013              lroffset+=oleftstep;\
1014              rroffset+=orightstep;\
1015            }
1016    
1017    
1018    // The result will be stored in m_samples
1019    // The return value is a pointer to the DataVector, offset is the offset within the return value
1020    const DataTypes::RealVectorType*
1021    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1022    {
1023    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1024            // collapse so we have a 'E' node or an IDENTITY for some other type
1025      if (m_readytype!='E' && m_op!=IDENTITY)
1026      {
1027            collapse();
1028      }
1029      if (m_op==IDENTITY)  
1030      {
1031        const ValueType& vec=m_id->getVectorRO();
1032        roffset=m_id->getPointOffset(sampleNo, 0);
1033    #ifdef LAZY_STACK_PROF
1034    int x;
1035    if (&x<stackend[omp_get_thread_num()])
1036    {
1037           stackend[omp_get_thread_num()]=&x;
1038    }
1039    #endif
1040        return &(vec);
1041      }
1042      if (m_readytype!='E')
1043      {
1044        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1045      }
1046      if (m_sampleids[tid]==sampleNo)
1047      {
1048            roffset=tid*m_samplesize;
1049            return &(m_samples);            // sample is already resolved
1050      }
1051      m_sampleids[tid]=sampleNo;
1052    
1053      switch (getOpgroup(m_op))
1054      {
1055      case G_UNARY:
1056      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1057      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1058      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1059      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1060      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1061      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1062      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1063      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1064      default:
1065        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1066      }
1067    }
1068    
1069    const DataTypes::RealVectorType*
1070    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1071    {
1072            // we assume that any collapsing has been done before we get here
1073            // since we only have one argument we don't need to think about only
1074            // processing single points.
1075            // we will also know we won't get identity nodes
1076      if (m_readytype!='E')
1077      {
1078        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1079      }
1080      if (m_op==IDENTITY)
1081      {
1082        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1083      }
1084      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1085      const double* left=&((*leftres)[roffset]);
1086      roffset=m_samplesize*tid;
1087      double* result=&(m_samples[roffset]);
1088      switch (m_op)
1089      {
1090        case SIN:  
1091            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1092            break;
1093        case COS:
1094            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1095            break;
1096        case TAN:
1097            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1098            break;
1099        case ASIN:
1100            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1101            break;
1102        case ACOS:
1103            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1104            break;
1105        case ATAN:
1106            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1107            break;
1108        case SINH:
1109            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1110            break;
1111        case COSH:
1112            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1113            break;
1114        case TANH:
1115            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1116            break;
1117        case ERF:
1118    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1119            throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1120  #else  #else
1121      tensor_unary_operation(m_samplesize, left, result, ::erf);          tensor_unary_operation(m_samplesize, left, result, ::erf);
1122      break;          break;
1123  #endif  #endif
1124     case ASINH:     case ASINH:
1125  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1126      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1127  #else  #else
1128      tensor_unary_operation(m_samplesize, left, result, ::asinh);          tensor_unary_operation(m_samplesize, left, result, ::asinh);
1129  #endif    #endif  
1130      break;          break;
1131     case ACOSH:     case ACOSH:
1132  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1133      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1134  #else  #else
1135      tensor_unary_operation(m_samplesize, left, result, ::acosh);          tensor_unary_operation(m_samplesize, left, result, ::acosh);
1136  #endif    #endif  
1137      break;          break;
1138     case ATANH:     case ATANH:
1139  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1140      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);          tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1141  #else  #else
1142      tensor_unary_operation(m_samplesize, left, result, ::atanh);          tensor_unary_operation(m_samplesize, left, result, ::atanh);
1143  #endif    #endif  
1144      break;          break;
1145      case LOG10:      case LOG10:
1146      tensor_unary_operation(m_samplesize, left, result, ::log10);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1147      break;          break;
1148      case LOG:      case LOG:
1149      tensor_unary_operation(m_samplesize, left, result, ::log);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1150      break;          break;
1151      case SIGN:      case SIGN:
1152      tensor_unary_operation(m_samplesize, left, result, escript::fsign);          tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1153      break;          break;
1154      case ABS:      case ABS:
1155      tensor_unary_operation(m_samplesize, left, result, ::fabs);          tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1156      break;          break;
1157      case NEG:      case NEG:
1158      tensor_unary_operation(m_samplesize, left, result, negate<double>());          tensor_unary_operation(m_samplesize, left, result, negate<double>());
1159      break;          break;
1160      case POS:      case POS:
1161      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
1162      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
1163      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
1164      break;          break;
1165        case EXP:
1166            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1167            break;
1168        case SQRT:
1169            tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1170            break;
1171        case RECIP:
1172            tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1173            break;
1174        case GZ:
1175            tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1176            break;
1177        case LZ:
1178            tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1179            break;
1180        case GEZ:
1181            tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1182            break;
1183        case LEZ:
1184            tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1185            break;
1186    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1187        case NEZ:
1188            tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1189            break;
1190        case EZ:
1191            tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1192            break;
1193    
1194      default:      default:
1195      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1196      }    }
1197      return &(m_samples);
1198    }
1199    
1200    
1201    const DataTypes::RealVectorType*
1202    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1203    {
1204            // we assume that any collapsing has been done before we get here
1205            // since we only have one argument we don't need to think about only
1206            // processing single points.
1207            // we will also know we won't get identity nodes
1208      if (m_readytype!='E')
1209      {
1210        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1211      }
1212      if (m_op==IDENTITY)
1213      {
1214        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1215      }
1216      size_t loffset=0;
1217      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1218    
1219      roffset=m_samplesize*tid;
1220      unsigned int ndpps=getNumDPPSample();
1221      unsigned int psize=DataTypes::noValues(m_left->getShape());
1222      double* result=&(m_samples[roffset]);
1223      switch (m_op)
1224      {
1225        case MINVAL:
1226            {
1227              for (unsigned int z=0;z<ndpps;++z)
1228              {
1229                FMin op;
1230                *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1231                loffset+=psize;
1232                result++;
1233              }
1234            }
1235            break;
1236        case MAXVAL:
1237            {
1238              for (unsigned int z=0;z<ndpps;++z)
1239              {
1240              FMax op;
1241              *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1242              loffset+=psize;
1243              result++;
1244              }
1245            }
1246            break;
1247        default:
1248            throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1249      }
1250      return &(m_samples);
1251    }
1252    
1253    const DataTypes::RealVectorType*
1254    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1255    {
1256            // we assume that any collapsing has been done before we get here
1257            // since we only have one argument we don't need to think about only
1258            // processing single points.
1259      if (m_readytype!='E')
1260      {
1261        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1262      }
1263      if (m_op==IDENTITY)
1264      {
1265        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1266      }
1267      size_t subroffset;
1268      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1269      roffset=m_samplesize*tid;
1270      size_t loop=0;
1271      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1272      size_t step=getNoValues();
1273      size_t offset=roffset;
1274      switch (m_op)
1275      {
1276        case SYM:
1277            for (loop=0;loop<numsteps;++loop)
1278            {
1279                DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1280                subroffset+=step;
1281                offset+=step;
1282            }
1283            break;
1284        case NSYM:
1285            for (loop=0;loop<numsteps;++loop)
1286            {
1287                DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1288                subroffset+=step;
1289                offset+=step;
1290            }
1291            break;
1292        default:
1293            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1294      }
1295      return &m_samples;
1296    }
1297    
1298    const DataTypes::RealVectorType*
1299    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1300    {
1301            // we assume that any collapsing has been done before we get here
1302            // since we only have one argument we don't need to think about only
1303            // processing single points.
1304      if (m_readytype!='E')
1305      {
1306        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1307      }
1308      if (m_op==IDENTITY)
1309      {
1310        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1311      }
1312      size_t subroffset;
1313      size_t offset;
1314      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1315      roffset=m_samplesize*tid;
1316      offset=roffset;
1317      size_t loop=0;
1318      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1319      size_t outstep=getNoValues();
1320      size_t instep=m_left->getNoValues();
1321      switch (m_op)
1322      {
1323        case TRACE:
1324            for (loop=0;loop<numsteps;++loop)
1325            {
1326                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1327                subroffset+=instep;
1328                offset+=outstep;
1329            }
1330            break;
1331        case TRANS:
1332            for (loop=0;loop<numsteps;++loop)
1333            {
1334                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1335                subroffset+=instep;
1336                offset+=outstep;
1337            }
1338            break;
1339        default:
1340            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1341      }
1342      return &m_samples;
1343    }
1344    
1345    
1346    const DataTypes::RealVectorType*
1347    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1348    {
1349      if (m_readytype!='E')
1350      {
1351        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1352      }
1353      if (m_op==IDENTITY)
1354      {
1355        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1356      }
1357      size_t subroffset;
1358      size_t offset;
1359      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1360      roffset=m_samplesize*tid;
1361      offset=roffset;
1362      size_t loop=0;
1363      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1364      size_t outstep=getNoValues();
1365      size_t instep=m_left->getNoValues();
1366      switch (m_op)
1367      {
1368        case SWAP:
1369            for (loop=0;loop<numsteps;++loop)
1370            {
1371                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1372                subroffset+=instep;
1373                offset+=outstep;
1374            }
1375            break;
1376        default:
1377            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1378      }
1379      return &m_samples;
1380    }
1381    
1382    const DataTypes::RealVectorType*
1383    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1384    {
1385      if (m_readytype!='E')
1386      {
1387        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1388      }
1389      if (m_op!=CONDEVAL)
1390      {
1391        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1392      }
1393      size_t subroffset;
1394    
1395      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1396      const ValueType* srcres=0;
1397      if ((*maskres)[subroffset]>0)
1398      {
1399            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1400      }
1401      else
1402      {
1403            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1404      }
1405    
1406      // Now we need to copy the result
1407    
1408      roffset=m_samplesize*tid;
1409      for (int i=0;i<m_samplesize;++i)
1410      {
1411            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1412      }
1413    
1414      return &m_samples;
1415    }
1416    
1417    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1418    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1419    // If both children are expanded, then we can process them in a single operation (we treat
1420    // the whole sample as one big datapoint.
1421    // If one of the children is not expanded, then we need to treat each point in the sample
1422    // individually.
1423    // There is an additional complication when scalar operations are considered.
1424    // For example, 2+Vector.
1425    // In this case each double within the point is treated individually
1426    const DataTypes::RealVectorType*
1427    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1428    {
1429    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1430    
1431      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1432            // first work out which of the children are expanded
1433      bool leftExp=(m_left->m_readytype=='E');
1434      bool rightExp=(m_right->m_readytype=='E');
1435      if (!leftExp && !rightExp)
1436      {
1437            throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1438      }
1439      bool leftScalar=(m_left->getRank()==0);
1440      bool rightScalar=(m_right->getRank()==0);
1441      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1442      {
1443            throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1444      }
1445      size_t leftsize=m_left->getNoValues();
1446      size_t rightsize=m_right->getNoValues();
1447      size_t chunksize=1;                   // how many doubles will be processed in one go
1448      int leftstep=0;               // how far should the left offset advance after each step
1449      int rightstep=0;
1450      int numsteps=0;               // total number of steps for the inner loop
1451      int oleftstep=0;      // the o variables refer to the outer loop
1452      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1453      int onumsteps=1;
1454      
1455      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1456      bool RES=(rightExp && rightScalar);
1457      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1458      bool RS=(!rightExp && rightScalar);
1459      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1460      bool RN=(!rightExp && !rightScalar);
1461      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1462      bool REN=(rightExp && !rightScalar);
1463    
1464      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1465      {
1466            chunksize=m_left->getNumDPPSample()*leftsize;
1467            leftstep=0;
1468            rightstep=0;
1469            numsteps=1;
1470      }
1471      else if (LES || RES)
1472      {
1473            chunksize=1;
1474            if (LES)                // left is an expanded scalar
1475            {
1476                    if (RS)
1477                    {
1478                       leftstep=1;
1479                       rightstep=0;
1480                       numsteps=m_left->getNumDPPSample();
1481                    }
1482                    else            // RN or REN
1483                    {
1484                       leftstep=0;
1485                       oleftstep=1;
1486                       rightstep=1;
1487                       orightstep=(RN ? -(int)rightsize : 0);
1488                       numsteps=rightsize;
1489                       onumsteps=m_left->getNumDPPSample();
1490                    }
1491            }
1492            else            // right is an expanded scalar
1493            {
1494                    if (LS)
1495                    {
1496                       rightstep=1;
1497                       leftstep=0;
1498                       numsteps=m_right->getNumDPPSample();
1499                    }
1500                    else
1501                    {
1502                       rightstep=0;
1503                       orightstep=1;
1504                       leftstep=1;
1505                       oleftstep=(LN ? -(int)leftsize : 0);
1506                       numsteps=leftsize;
1507                       onumsteps=m_right->getNumDPPSample();
1508                    }
1509            }
1510      }
1511      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1512      {
1513            if (LEN)        // and Right will be a single value
1514            {
1515                    chunksize=rightsize;
1516                    leftstep=rightsize;
1517                    rightstep=0;
1518                    numsteps=m_left->getNumDPPSample();
1519                    if (RS)
1520                    {
1521                       numsteps*=leftsize;
1522                    }
1523            }
1524            else    // REN
1525            {
1526                    chunksize=leftsize;
1527                    rightstep=leftsize;
1528                    leftstep=0;
1529                    numsteps=m_right->getNumDPPSample();
1530                    if (LS)
1531                    {
1532                       numsteps*=rightsize;
1533                    }
1534            }
1535      }
1536    
1537      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1538            // Get the values of sub-expressions
1539      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1540      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1541    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1542    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1543    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1544    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1545    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1546    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1547    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1548    
1549    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1550    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1551    
1552    
1553      roffset=m_samplesize*tid;
1554      double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1555      switch(m_op)
1556      {
1557        case ADD:
1558            PROC_OP(NO_ARG,plus<double>());
1559            break;
1560        case SUB:
1561            PROC_OP(NO_ARG,minus<double>());
1562            break;
1563        case MUL:
1564            PROC_OP(NO_ARG,multiplies<double>());
1565            break;
1566        case DIV:
1567            PROC_OP(NO_ARG,divides<double>());
1568            break;
1569        case POW:
1570           PROC_OP(double (double,double),::pow);
1571            break;
1572        default:
1573            throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1574    }    }
1575    return result;  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1576      return &m_samples;
1577  }  }
1578    
1579    
1580    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1581    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1582    // unlike the other resolve helpers, we must treat these datapoints separately.
1583    const DataTypes::RealVectorType*
1584    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1585    {
1586    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1587    
1588      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1589            // first work out which of the children are expanded
1590      bool leftExp=(m_left->m_readytype=='E');
1591      bool rightExp=(m_right->m_readytype=='E');
1592      int steps=getNumDPPSample();
1593      int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1594      int rightStep=(rightExp?m_right->getNoValues() : 0);
1595    
1596      int resultStep=getNoValues();
1597      roffset=m_samplesize*tid;
1598      size_t offset=roffset;
1599    
1600      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1601    
1602      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1603    
1604    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1605    cout << getNoValues() << endl;)
1606    
1607    
1608    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1609    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1610    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1611    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1612    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1613    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1614    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1615    
1616      double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1617      switch(m_op)
1618      {
1619        case PROD:
1620            for (int i=0;i<steps;++i,resultp+=resultStep)
1621            {
1622              const double *ptr_0 = &((*left)[lroffset]);
1623              const double *ptr_1 = &((*right)[rroffset]);
1624    
1625    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1626    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1627    
1628              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1629    
1630              lroffset+=leftStep;
1631              rroffset+=rightStep;
1632            }
1633            break;
1634        default:
1635            throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1636      }
1637      roffset=offset;
1638      return &m_samples;
1639    }
1640    
1641    
1642    const DataTypes::RealVectorType*
1643    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1644    {
1645    #ifdef _OPENMP
1646            int tid=omp_get_thread_num();
1647    #else
1648            int tid=0;
1649    #endif
1650    
1651    #ifdef LAZY_STACK_PROF
1652            stackstart[tid]=&tid;
1653            stackend[tid]=&tid;
1654            const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1655            size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1656            #pragma omp critical
1657            if (d>maxstackuse)
1658            {
1659    cout << "Max resolve Stack use " << d << endl;
1660                    maxstackuse=d;
1661            }
1662            return r;
1663    #else
1664            return resolveNodeSample(tid, sampleNo, roffset);
1665    #endif
1666    }
1667    
1668    
1669    // This needs to do the work of the identity constructor
1670    void
1671    DataLazy::resolveToIdentity()
1672    {
1673       if (m_op==IDENTITY)
1674            return;
1675       DataReady_ptr p=resolveNodeWorker();
1676       makeIdentity(p);
1677    }
1678    
1679    void DataLazy::makeIdentity(const DataReady_ptr& p)
1680    {
1681       m_op=IDENTITY;
1682       m_axis_offset=0;
1683       m_transpose=0;
1684       m_SL=m_SM=m_SR=0;
1685       m_children=m_height=0;
1686       m_id=p;
1687       if(p->isConstant()) {m_readytype='C';}
1688       else if(p->isExpanded()) {m_readytype='E';}
1689       else if (p->isTagged()) {m_readytype='T';}
1690       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1691       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1692       m_left.reset();
1693       m_right.reset();
1694    }
1695    
1696    
1697  DataReady_ptr  DataReady_ptr
1698  DataLazy::resolve()  DataLazy::resolve()
1699  {  {
1700    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1701    // so the allocation of v needs to move inside the loop somehow      return m_id;
1702    }
1703    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1704    
1705    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1706    int numthreads=1;  void
1707    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1708    {
1709      if (dats.empty())
1710      {
1711            return;
1712      }
1713      vector<DataLazy*> work;
1714      FunctionSpace fs=dats[0]->getFunctionSpace();
1715      bool match=true;
1716      for (int i=dats.size()-1;i>=0;--i)
1717      {
1718            if (dats[i]->m_readytype!='E')
1719            {
1720                    dats[i]->collapse();
1721            }
1722            if (dats[i]->m_op!=IDENTITY)
1723            {
1724                    work.push_back(dats[i]);
1725                    if (fs!=dats[i]->getFunctionSpace())
1726                    {
1727                            match=false;
1728                    }
1729            }
1730      }
1731      if (work.empty())
1732      {
1733            return;         // no work to do
1734      }
1735      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1736      {             // it is possible that dats[0] is one of the objects which we discarded and
1737                    // all the other functionspaces match.
1738            vector<DataExpanded*> dep;
1739            vector<ValueType*> vecs;
1740            for (int i=0;i<work.size();++i)
1741            {
1742                    dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1743                    vecs.push_back(&(dep[i]->getVectorRW()));
1744            }
1745            int totalsamples=work[0]->getNumSamples();
1746            const ValueType* res=0; // Storage for answer
1747            int sample;
1748            #pragma omp parallel private(sample, res)
1749            {
1750                size_t roffset=0;
1751                #pragma omp for schedule(static)
1752                for (sample=0;sample<totalsamples;++sample)
1753                {
1754                    roffset=0;
1755                    int j;
1756                    for (j=work.size()-1;j>=0;--j)
1757                    {
1758  #ifdef _OPENMP  #ifdef _OPENMP
1759    numthreads=omp_get_max_threads();                      res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1760    int threadnum=0;  #else
1761  #endif                      res=work[j]->resolveNodeSample(0,sample,roffset);
1762    ValueType v(numthreads*threadbuffersize);  #endif
1763  cout << "Buffer created with size=" << v.size() << endl;                      RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1764    ValueType dummy(getNoValues());                      memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1765    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);                  }
1766    ValueType& resvec=result->getVector();              }
1767            }
1768            // Now we need to load the new results as identity ops into the lazy nodes
1769            for (int i=work.size()-1;i>=0;--i)
1770            {
1771                work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1772            }
1773      }
1774      else  // functionspaces do not match
1775      {
1776            for (int i=0;i<work.size();++i)
1777            {
1778                    work[i]->resolveToIdentity();
1779            }
1780      }
1781    }
1782    
1783    
1784    
1785    // This version of resolve uses storage in each node to hold results
1786    DataReady_ptr
1787    DataLazy::resolveNodeWorker()
1788    {
1789      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1790      {
1791        collapse();
1792      }
1793      if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1794      {
1795        return m_id;
1796      }
1797            // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1798      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1799      ValueType& resvec=result->getVectorRW();
1800    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1801    
1802    int sample;    int sample;
   int resoffset;  
1803    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1804    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;       // Storage for answer
1805    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1806      #pragma omp parallel private(sample,res)
1807    {    {
1808            size_t roffset=0;
1809    #ifdef LAZY_STACK_PROF
1810            stackstart[omp_get_thread_num()]=&roffset;
1811            stackend[omp_get_thread_num()]=&roffset;
1812    #endif
1813            #pragma omp for schedule(static)
1814            for (sample=0;sample<totalsamples;++sample)
1815            {
1816                    roffset=0;
1817  #ifdef _OPENMP  #ifdef _OPENMP
1818      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1819  #else  #else
1820      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.                  res=resolveNodeSample(0,sample,roffset);
1821  #endif  #endif
1822      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1823      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1824      {                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1825      resvec[resoffset]=res[i];                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1826      }          }
1827    }    }
1828    #ifdef LAZY_STACK_PROF
1829      for (int i=0;i<getNumberOfThreads();++i)
1830      {
1831            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1832    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1833            if (r>maxstackuse)
1834            {
1835                    maxstackuse=r;
1836            }
1837      }
1838      cout << "Max resolve Stack use=" << maxstackuse << endl;
1839    #endif
1840    return resptr;    return resptr;
1841  }  }
1842    
# Line 407  std::string Line 1844  std::string
1844  DataLazy::toString() const  DataLazy::toString() const
1845  {  {
1846    ostringstream oss;    ostringstream oss;
1847    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1848    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1849      {
1850      case 1:       // tree format
1851            oss << endl;
1852            intoTreeString(oss,"");
1853            break;
1854      case 2:       // just the depth
1855            break;
1856      default:
1857            intoString(oss);
1858            break;
1859      }
1860    return oss.str();    return oss.str();
1861  }  }
1862    
1863    
1864  void  void
1865  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1866  {  {
1867    //    oss << "[" << m_children <<";"<<m_height <<"]";
1868    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1869    {    {
1870    case G_IDENTITY:    case G_IDENTITY:
1871      oss << '@' << m_id.get();          if (m_id->isExpanded())
1872      break;          {
1873               oss << "E";
1874            }
1875            else if (m_id->isTagged())
1876            {
1877              oss << "T";
1878            }
1879            else if (m_id->isConstant())
1880            {
1881              oss << "C";
1882            }
1883            else
1884            {
1885              oss << "?";
1886            }
1887            oss << '@' << m_id.get();
1888            break;
1889    case G_BINARY:    case G_BINARY:
1890      oss << '(';          oss << '(';
1891      m_left->intoString(oss);          m_left->intoString(oss);
1892      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1893      m_right->intoString(oss);          m_right->intoString(oss);
1894      oss << ')';          oss << ')';
1895      break;          break;
1896    case G_UNARY:    case G_UNARY:
1897      oss << opToString(m_op) << '(';    case G_UNARY_P:
1898      m_left->intoString(oss);    case G_NP1OUT:
1899      oss << ')';    case G_NP1OUT_P:
1900      break;    case G_REDUCTION:
1901            oss << opToString(m_op) << '(';
1902            m_left->intoString(oss);
1903            oss << ')';
1904            break;
1905      case G_TENSORPROD:
1906            oss << opToString(m_op) << '(';
1907            m_left->intoString(oss);
1908            oss << ", ";
1909            m_right->intoString(oss);
1910            oss << ')';
1911            break;
1912      case G_NP1OUT_2P:
1913            oss << opToString(m_op) << '(';
1914            m_left->intoString(oss);
1915            oss << ", " << m_axis_offset << ", " << m_transpose;
1916            oss << ')';
1917            break;
1918      case G_CONDEVAL:
1919            oss << opToString(m_op)<< '(' ;
1920            m_mask->intoString(oss);
1921            oss << " ? ";
1922            m_left->intoString(oss);
1923            oss << " : ";
1924            m_right->intoString(oss);
1925            oss << ')';
1926            break;
1927    default:    default:
1928      oss << "UNKNOWN";          oss << "UNKNOWN";
1929    }    }
1930  }  }
1931    
1932  // Note that in this case, deepCopy does not make copies of the leaves.  
1933  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1934    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1935    {
1936      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1937      switch (getOpgroup(m_op))
1938      {
1939      case G_IDENTITY:
1940            if (m_id->isExpanded())
1941            {
1942               oss << "E";
1943            }
1944            else if (m_id->isTagged())
1945            {
1946              oss << "T";
1947            }
1948            else if (m_id->isConstant())
1949            {
1950              oss << "C";
1951            }
1952            else
1953            {
1954              oss << "?";
1955            }
1956            oss << '@' << m_id.get() << endl;
1957            break;
1958      case G_BINARY:
1959            oss << opToString(m_op) << endl;
1960            indent+='.';
1961            m_left->intoTreeString(oss, indent);
1962            m_right->intoTreeString(oss, indent);
1963            break;
1964      case G_UNARY:
1965      case G_UNARY_P:
1966      case G_NP1OUT:
1967      case G_NP1OUT_P:
1968      case G_REDUCTION:
1969            oss << opToString(m_op) << endl;
1970            indent+='.';
1971            m_left->intoTreeString(oss, indent);
1972            break;
1973      case G_TENSORPROD:
1974            oss << opToString(m_op) << endl;
1975            indent+='.';
1976            m_left->intoTreeString(oss, indent);
1977            m_right->intoTreeString(oss, indent);
1978            break;
1979      case G_NP1OUT_2P:
1980            oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1981            indent+='.';
1982            m_left->intoTreeString(oss, indent);
1983            break;
1984      default:
1985            oss << "UNKNOWN";
1986      }
1987    }
1988    
1989    
1990  DataAbstract*  DataAbstract*
1991  DataLazy::deepCopy()  DataLazy::deepCopy() const
1992  {  {
1993    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1994    {    {
1995      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1996      case G_UNARY:
1997      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1998      case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1999      case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2000      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2001      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2002      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2003      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2004      default:
2005            throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2006    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2007  }  }
2008    
2009    
2010  DataTypes::ValueType::size_type  
2011    // There is no single, natural interpretation of getLength on DataLazy.
2012    // Instances of DataReady can look at the size of their vectors.
2013    // For lazy though, it could be the size the data would be if it were resolved;
2014    // or it could be some function of the lengths of the DataReady instances which
2015    // form part of the expression.
2016    // Rather than have people making assumptions, I have disabled the method.
2017    DataTypes::RealVectorType::size_type
2018  DataLazy::getLength() const  DataLazy::getLength() const
2019  {  {
2020    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2021  }  }
2022    
2023    
# Line 463  DataLazy::getSlice(const DataTypes::Regi Line 2027  DataLazy::getSlice(const DataTypes::Regi
2027    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2028  }  }
2029    
2030  DataTypes::ValueType::size_type  
2031    // To do this we need to rely on our child nodes
2032    DataTypes::RealVectorType::size_type
2033    DataLazy::getPointOffset(int sampleNo,
2034                     int dataPointNo)
2035    {
2036      if (m_op==IDENTITY)
2037      {
2038            return m_id->getPointOffset(sampleNo,dataPointNo);
2039      }
2040      if (m_readytype!='E')
2041      {
2042            collapse();
2043            return m_id->getPointOffset(sampleNo,dataPointNo);
2044      }
2045      // at this point we do not have an identity node and the expression will be Expanded
2046      // so we only need to know which child to ask
2047      if (m_left->m_readytype=='E')
2048      {
2049            return m_left->getPointOffset(sampleNo,dataPointNo);
2050      }
2051      else
2052      {
2053            return m_right->getPointOffset(sampleNo,dataPointNo);
2054      }
2055    }
2056    
2057    // To do this we need to rely on our child nodes
2058    DataTypes::RealVectorType::size_type
2059  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2060                   int dataPointNo) const                   int dataPointNo) const
2061  {  {
2062    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2063      {
2064            return m_id->getPointOffset(sampleNo,dataPointNo);
2065      }
2066      if (m_readytype=='E')
2067      {
2068        // at this point we do not have an identity node and the expression will be Expanded
2069        // so we only need to know which child to ask
2070        if (m_left->m_readytype=='E')
2071        {
2072            return m_left->getPointOffset(sampleNo,dataPointNo);
2073        }
2074        else
2075        {
2076            return m_right->getPointOffset(sampleNo,dataPointNo);
2077        }
2078      }
2079      if (m_readytype=='C')
2080      {
2081            return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2082      }
2083      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2084    }
2085    
2086    
2087    // I have decided to let Data:: handle this issue.
2088    void
2089    DataLazy::setToZero()
2090    {
2091    //   DataTypes::RealVectorType v(getNoValues(),0);
2092    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2093    //   m_op=IDENTITY;
2094    //   m_right.reset();  
2095    //   m_left.reset();
2096    //   m_readytype='C';
2097    //   m_buffsRequired=1;
2098    
2099      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2100      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2101  }  }
2102    
2103  }   // end namespace  bool
2104    DataLazy::actsExpanded() const
2105    {
2106            return (m_readytype=='E');
2107    }
2108    
2109    } // end namespace
2110    

Legend:
Removed from v.1886  
changed lines
  Added in v.5997

  ViewVC Help
Powered by ViewVC 1.1.26