/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 4657 by jfenwick, Thu Feb 6 06:12:20 2014 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2014 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    
18  #include "DataLazy.h"  #include "DataLazy.h"
19  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
20  #ifdef _OPENMP  #ifdef _OPENMP
21  #include <omp.h>  #include <omp.h>
22  #endif  #endif
# Line 26  Line 24 
24  #include "DataTypes.h"  #include "DataTypes.h"
25  #include "Data.h"  #include "Data.h"
26  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
27    #include "Utils.h"
28    
29    #include "EscriptParams.h"
30    
31    #ifdef USE_NETCDF
32    #include <netcdfcpp.h>
33    #endif
34    
35    #include <iomanip>      // for some fancy formatting in debug
36    
37    // #define LAZYDEBUG(X) if (privdebug){X;}
38    #define LAZYDEBUG(X)
39    namespace
40    {
41    bool privdebug=false;
42    
43    #define ENABLEDEBUG privdebug=true;
44    #define DISABLEDEBUG privdebug=false;
45    }
46    
47    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
48    
49    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
50    
51    
52    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
53    
54    /*
55    How does DataLazy work?
56    ~~~~~~~~~~~~~~~~~~~~~~~
57    
58    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
59    denoted left and right.
60    
61    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
62    This means that all "internal" nodes in the structure are instances of DataLazy.
63    
64    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65    Note that IDENTITY is not considered a unary operation.
66    
67    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68    It must however form a DAG (directed acyclic graph).
69    I will refer to individual DataLazy objects with the structure as nodes.
70    
71    Each node also stores:
72    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73        evaluated.
74    - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75        evaluate the expression.
76    - m_samplesize ~ the number of doubles stored in a sample.
77    
78    When a new node is created, the above values are computed based on the values in the child nodes.
79    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
80    
81    The resolve method, which produces a DataReady from a DataLazy, does the following:
82    1) Create a DataReady to hold the new result.
83    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
84    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
85    
86    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
87    
88    resolveSample returns a Vector* and an offset within that vector where the result is stored.
89    Normally, this would be v, but for identity nodes their internal vector is returned instead.
90    
91    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
92    
93    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
94    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
95    
96    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
97    1) Add to the ES_optype.
98    2) determine what opgroup your operation belongs to (X)
99    3) add a string for the op to the end of ES_opstrings
100    4) increase ES_opcount
101    5) add an entry (X) to opgroups
102    6) add an entry to the switch in collapseToReady
103    7) add an entry to resolveX
104    */
105    
106    
107  using namespace std;  using namespace std;
108  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 110  using namespace boost;
110  namespace escript  namespace escript
111  {  {
112    
 const std::string&  
 opToString(ES_optype op);  
   
113  namespace  namespace
114  {  {
115    
116    
117    // enabling this will print out when ever the maximum stacksize used by resolve increases
118    // it assumes _OPENMP is also in use
119    //#define LAZY_STACK_PROF
120    
121    
122    
123    #ifndef _OPENMP
124      #ifdef LAZY_STACK_PROF
125      #undef LAZY_STACK_PROF
126      #endif
127    #endif
128    
129    
130    #ifdef LAZY_STACK_PROF
131    std::vector<void*> stackstart(getNumberOfThreads());
132    std::vector<void*> stackend(getNumberOfThreads());
133    size_t maxstackuse=0;
134    #endif
135    
136  enum ES_opgroup  enum ES_opgroup
137  {  {
138     G_UNKNOWN,     G_UNKNOWN,
139     G_IDENTITY,     G_IDENTITY,
140     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
141     G_UNARY     G_UNARY,     // pointwise operations with one argument
142       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
143       G_NP1OUT,        // non-pointwise op with one output
144       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
145       G_TENSORPROD,    // general tensor product
146       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
147       G_REDUCTION,     // non-pointwise unary op with a scalar output
148       G_CONDEVAL
149  };  };
150    
151    
152    
153    
154  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
155                "sin","cos","tan",
156              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
157              "asinh","acosh","atanh",              "asinh","acosh","atanh",
158              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
159              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
160  int ES_opcount=32;              "symmetric","nonsymmetric",
161  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
162              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose", "trace",
163              G_UNARY,G_UNARY,G_UNARY,                    // 19              "swapaxes",
164              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27              "minval", "maxval",
165              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              "condEval"};
166    int ES_opcount=44;
167    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
168                G_UNARY,G_UNARY,G_UNARY, //10
169                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
170                G_UNARY,G_UNARY,G_UNARY,                    // 20
171                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
172                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
173                G_NP1OUT,G_NP1OUT,
174                G_TENSORPROD,
175                G_NP1OUT_P, G_NP1OUT_P,
176                G_NP1OUT_2P,
177                G_REDUCTION, G_REDUCTION,
178                G_CONDEVAL};
179  inline  inline
180  ES_opgroup  ES_opgroup
181  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 192  resultFS(DataAbstract_ptr left, DataAbst
192      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
193      // programming error exception.      // programming error exception.
194    
195      FunctionSpace l=left->getFunctionSpace();
196      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
197      {    if (l!=r)
198          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
199      }      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
200      return left->getFunctionSpace();      if (res==1)
201        {
202        return l;
203        }
204        if (res==-1)
205        {
206        return r;
207        }
208        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
209      }
210      return l;
211  }  }
212    
213  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
214    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
215  DataTypes::ShapeType  DataTypes::ShapeType
216  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
217  {  {
218      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
219      {      {
220          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
221          {
222            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
223          }
224    
225          if (left->getRank()==0)   // we need to allow scalar * anything
226          {
227            return right->getShape();
228          }
229          if (right->getRank()==0)
230          {
231            return left->getShape();
232          }
233          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
234      }      }
235      return left->getShape();      return left->getShape();
236  }  }
237    
238  size_t  // return the shape for "op left"
239  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
240    DataTypes::ShapeType
241    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
242  {  {
243     switch (getOpgroup(op))      switch(op)
244     {      {
245     case G_BINARY: return left->getLength();          case TRANS:
246     case G_UNARY: return left->getLength();         {            // for the scoping of variables
247     default:          const DataTypes::ShapeType& s=left->getShape();
248      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
249     }          int rank=left->getRank();
250            if (axis_offset<0 || axis_offset>rank)
251            {
252                stringstream e;
253                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
254                throw DataException(e.str());
255            }
256            for (int i=0; i<rank; i++)
257            {
258               int index = (axis_offset+i)%rank;
259               sh.push_back(s[index]); // Append to new shape
260            }
261            return sh;
262           }
263        break;
264        case TRACE:
265           {
266            int rank=left->getRank();
267            if (rank<2)
268            {
269               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
270            }
271            if ((axis_offset>rank-2) || (axis_offset<0))
272            {
273               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
274            }
275            if (rank==2)
276            {
277               return DataTypes::scalarShape;
278            }
279            else if (rank==3)
280            {
281               DataTypes::ShapeType sh;
282                   if (axis_offset==0)
283               {
284                    sh.push_back(left->getShape()[2]);
285                   }
286                   else     // offset==1
287               {
288                sh.push_back(left->getShape()[0]);
289                   }
290               return sh;
291            }
292            else if (rank==4)
293            {
294               DataTypes::ShapeType sh;
295               const DataTypes::ShapeType& s=left->getShape();
296                   if (axis_offset==0)
297               {
298                    sh.push_back(s[2]);
299                    sh.push_back(s[3]);
300                   }
301                   else if (axis_offset==1)
302               {
303                    sh.push_back(s[0]);
304                    sh.push_back(s[3]);
305                   }
306               else     // offset==2
307               {
308                sh.push_back(s[0]);
309                sh.push_back(s[1]);
310               }
311               return sh;
312            }
313            else        // unknown rank
314            {
315               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
316            }
317           }
318        break;
319            default:
320        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
321        }
322  }  }
323    
324  int  DataTypes::ShapeType
325  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
326  {  {
327     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
328     {       // Some of the checks are probably redundant here
329     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
330     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
331     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
332     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
333      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
334     }       int rank=left->getRank();
335         if (rank<2) {
336            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
337         }
338         if (axis0<0 || axis0>rank-1) {
339            stringstream e;
340            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
341            throw DataException(e.str());
342         }
343         if (axis1<0 || axis1>rank-1) {
344            stringstream e;
345            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
346            throw DataException(e.str());
347         }
348         if (axis0 == axis1) {
349             throw DataException("Error - Data::swapaxes: axis indices must be different.");
350         }
351         if (axis0 > axis1) {
352             axis0_tmp=axis1;
353             axis1_tmp=axis0;
354         } else {
355             axis0_tmp=axis0;
356             axis1_tmp=axis1;
357         }
358         for (int i=0; i<rank; i++) {
359           if (i == axis0_tmp) {
360              out_shape.push_back(s[axis1_tmp]);
361           } else if (i == axis1_tmp) {
362              out_shape.push_back(s[axis0_tmp]);
363           } else {
364              out_shape.push_back(s[i]);
365           }
366         }
367        return out_shape;
368  }  }
369    
370    
371    // determine the output shape for the general tensor product operation
372    // the additional parameters return information required later for the product
373    // the majority of this code is copy pasted from C_General_Tensor_Product
374    DataTypes::ShapeType
375    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
376    {
377        
378      // Get rank and shape of inputs
379      int rank0 = left->getRank();
380      int rank1 = right->getRank();
381      const DataTypes::ShapeType& shape0 = left->getShape();
382      const DataTypes::ShapeType& shape1 = right->getShape();
383    
384      // Prepare for the loops of the product and verify compatibility of shapes
385      int start0=0, start1=0;
386      if (transpose == 0)       {}
387      else if (transpose == 1)  { start0 = axis_offset; }
388      else if (transpose == 2)  { start1 = rank1-axis_offset; }
389      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
390    
391      if (rank0<axis_offset)
392      {
393        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
394      }
395    
396      // Adjust the shapes for transpose
397      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
398      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
399      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
400      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
401    
402      // Prepare for the loops of the product
403      SL=1, SM=1, SR=1;
404      for (int i=0; i<rank0-axis_offset; i++)   {
405        SL *= tmpShape0[i];
406      }
407      for (int i=rank0-axis_offset; i<rank0; i++)   {
408        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
409          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
410        }
411        SM *= tmpShape0[i];
412      }
413      for (int i=axis_offset; i<rank1; i++)     {
414        SR *= tmpShape1[i];
415      }
416    
417      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
418      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
419      {         // block to limit the scope of out_index
420         int out_index=0;
421         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
422         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
423      }
424    
425      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
426      {
427         ostringstream os;
428         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
429         throw DataException(os.str());
430      }
431    
432      return shape2;
433    }
434    
435  }   // end anonymous namespace  }   // end anonymous namespace
436    
437    
438    
439    // Return a string representing the operation
440  const std::string&  const std::string&
441  opToString(ES_optype op)  opToString(ES_optype op)
442  {  {
# Line 138  opToString(ES_optype op) Line 447  opToString(ES_optype op)
447    return ES_opstrings[op];    return ES_opstrings[op];
448  }  }
449    
450    void DataLazy::LazyNodeSetup()
451    {
452    #ifdef _OPENMP
453        int numthreads=omp_get_max_threads();
454        m_samples.resize(numthreads*m_samplesize);
455        m_sampleids=new int[numthreads];
456        for (int i=0;i<numthreads;++i)
457        {
458            m_sampleids[i]=-1;  
459        }
460    #else
461        m_samples.resize(m_samplesize);
462        m_sampleids=new int[1];
463        m_sampleids[0]=-1;
464    #endif  // _OPENMP
465    }
466    
467    
468    // Creates an identity node
469  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
470      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
471      m_op(IDENTITY)      ,m_sampleids(0),
472        m_samples(1)
473  {  {
474     if (p->isLazy())     if (p->isLazy())
475     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
476      // I don't want identity of Lazy.      // I don't want identity of Lazy.
477      // Question: Why would that be so bad?      // Question: Why would that be so bad?
478      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 153  DataLazy::DataLazy(DataAbstract_ptr p) Line 480  DataLazy::DataLazy(DataAbstract_ptr p)
480     }     }
481     else     else
482     {     {
483      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
484        DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
485        makeIdentity(dr);
486    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
487     }     }
488     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
489  }  }
490    
491  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
492      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
493      m_op(op)      m_op(op),
494        m_axis_offset(0),
495        m_transpose(0),
496        m_SL(0), m_SM(0), m_SR(0)
497  {  {
498     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
499     {     {
500      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
501     }     }
502    
503     DataLazy_ptr lleft;     DataLazy_ptr lleft;
504     if (!left->isLazy())     if (!left->isLazy())
505     {     {
# Line 178  DataLazy::DataLazy(DataAbstract_ptr left Line 509  DataLazy::DataLazy(DataAbstract_ptr left
509     {     {
510      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
511     }     }
512     m_length=left->getLength();     m_readytype=lleft->m_readytype;
513     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
514     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
515       m_children=m_left->m_children+1;
516       m_height=m_left->m_height+1;
517       LazyNodeSetup();
518       SIZELIMIT
519  }  }
520    
521    
522  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
523    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
524      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
525      m_left(left),      m_op(op),
526      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
527  {  {
528     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
529       if ((getOpgroup(op)!=G_BINARY))
530     {     {
531      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
532     }     }
533     m_length=resultLength(m_left,m_right,m_op);  
534       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
535       {
536        FunctionSpace fs=getFunctionSpace();
537        Data ltemp(left);
538        Data tmp(ltemp,fs);
539        left=tmp.borrowDataPtr();
540       }
541       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
542       {
543        Data tmp(Data(right),getFunctionSpace());
544        right=tmp.borrowDataPtr();
545    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
546       }
547       left->operandCheck(*right);
548    
549       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
550       {
551        m_left=dynamic_pointer_cast<DataLazy>(left);
552    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
553       }
554       else
555       {
556        m_left=DataLazy_ptr(new DataLazy(left));
557    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
558       }
559       if (right->isLazy())
560       {
561        m_right=dynamic_pointer_cast<DataLazy>(right);
562    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
563       }
564       else
565       {
566        m_right=DataLazy_ptr(new DataLazy(right));
567    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
568       }
569       char lt=m_left->m_readytype;
570       char rt=m_right->m_readytype;
571       if (lt=='E' || rt=='E')
572       {
573        m_readytype='E';
574       }
575       else if (lt=='T' || rt=='T')
576       {
577        m_readytype='T';
578       }
579       else
580       {
581        m_readytype='C';
582       }
583     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
584     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+m_right->m_children+2;
585  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
586       LazyNodeSetup();
587       SIZELIMIT
588    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
589  }  }
590    
591  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
592      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
593      m_op(op)      m_op(op),
594        m_axis_offset(axis_offset),
595        m_transpose(transpose)
596  {  {
597     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
598       {
599        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
600       }
601       if ((transpose>2) || (transpose<0))
602       {
603        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
604       }
605       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
606     {     {
607      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");      FunctionSpace fs=getFunctionSpace();
608        Data ltemp(left);
609        Data tmp(ltemp,fs);
610        left=tmp.borrowDataPtr();
611     }     }
612     if (left->isLazy())     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
613       {
614        Data tmp(Data(right),getFunctionSpace());
615        right=tmp.borrowDataPtr();
616       }
617    //    left->operandCheck(*right);
618    
619       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
620     {     {
621      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
622     }     }
# Line 225  DataLazy::DataLazy(DataAbstract_ptr left Line 632  DataLazy::DataLazy(DataAbstract_ptr left
632     {     {
633      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
634     }     }
635       char lt=m_left->m_readytype;
636       char rt=m_right->m_readytype;
637       if (lt=='E' || rt=='E')
638       {
639        m_readytype='E';
640       }
641       else if (lt=='T' || rt=='T')
642       {
643        m_readytype='T';
644       }
645       else
646       {
647        m_readytype='C';
648       }
649       m_samplesize=getNumDPPSample()*getNoValues();
650       m_children=m_left->m_children+m_right->m_children+2;
651       m_height=max(m_left->m_height,m_right->m_height)+1;
652       LazyNodeSetup();
653       SIZELIMIT
654    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
655    }
656    
657    
658    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
659        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
660        m_op(op),
661        m_axis_offset(axis_offset),
662        m_transpose(0),
663        m_tol(0)
664    {
665       if ((getOpgroup(op)!=G_NP1OUT_P))
666       {
667        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
668       }
669       DataLazy_ptr lleft;
670       if (!left->isLazy())
671       {
672        lleft=DataLazy_ptr(new DataLazy(left));
673       }
674       else
675       {
676        lleft=dynamic_pointer_cast<DataLazy>(left);
677       }
678       m_readytype=lleft->m_readytype;
679       m_left=lleft;
680       m_samplesize=getNumDPPSample()*getNoValues();
681       m_children=m_left->m_children+1;
682       m_height=m_left->m_height+1;
683       LazyNodeSetup();
684       SIZELIMIT
685    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
686    }
687    
688    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
689        : parent(left->getFunctionSpace(), left->getShape()),
690        m_op(op),
691        m_axis_offset(0),
692        m_transpose(0),
693        m_tol(tol)
694    {
695       if ((getOpgroup(op)!=G_UNARY_P))
696       {
697        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
698       }
699       DataLazy_ptr lleft;
700       if (!left->isLazy())
701       {
702        lleft=DataLazy_ptr(new DataLazy(left));
703       }
704       else
705       {
706        lleft=dynamic_pointer_cast<DataLazy>(left);
707       }
708       m_readytype=lleft->m_readytype;
709       m_left=lleft;
710       m_samplesize=getNumDPPSample()*getNoValues();
711       m_children=m_left->m_children+1;
712       m_height=m_left->m_height+1;
713       LazyNodeSetup();
714       SIZELIMIT
715    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
716    }
717    
718     m_length=resultLength(m_left,m_right,m_op);  
719    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
720        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
721        m_op(op),
722        m_axis_offset(axis0),
723        m_transpose(axis1),
724        m_tol(0)
725    {
726       if ((getOpgroup(op)!=G_NP1OUT_2P))
727       {
728        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
729       }
730       DataLazy_ptr lleft;
731       if (!left->isLazy())
732       {
733        lleft=DataLazy_ptr(new DataLazy(left));
734       }
735       else
736       {
737        lleft=dynamic_pointer_cast<DataLazy>(left);
738       }
739       m_readytype=lleft->m_readytype;
740       m_left=lleft;
741     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
742     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+1;
743  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
744       LazyNodeSetup();
745       SIZELIMIT
746    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
747  }  }
748    
749    
750  DataLazy::~DataLazy()  namespace
751  {  {
752    
753        inline int max3(int a, int b, int c)
754        {
755        int t=(a>b?a:b);
756        return (t>c?t:c);
757    
758        }
759    }
760    
761    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
762        : parent(left->getFunctionSpace(), left->getShape()),
763        m_op(CONDEVAL),
764        m_axis_offset(0),
765        m_transpose(0),
766        m_tol(0)
767    {
768    
769       DataLazy_ptr lmask;
770       DataLazy_ptr lleft;
771       DataLazy_ptr lright;
772       if (!mask->isLazy())
773       {
774        lmask=DataLazy_ptr(new DataLazy(mask));
775       }
776       else
777       {
778        lmask=dynamic_pointer_cast<DataLazy>(mask);
779       }
780       if (!left->isLazy())
781       {
782        lleft=DataLazy_ptr(new DataLazy(left));
783       }
784       else
785       {
786        lleft=dynamic_pointer_cast<DataLazy>(left);
787       }
788       if (!right->isLazy())
789       {
790        lright=DataLazy_ptr(new DataLazy(right));
791       }
792       else
793       {
794        lright=dynamic_pointer_cast<DataLazy>(right);
795       }
796       m_readytype=lmask->m_readytype;
797       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
798       {
799        throw DataException("Programmer Error - condEval arguments must have the same readytype");
800       }
801       m_left=lleft;
802       m_right=lright;
803       m_mask=lmask;
804       m_samplesize=getNumDPPSample()*getNoValues();
805       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
806       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
807       LazyNodeSetup();
808       SIZELIMIT
809    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
810  }  }
811    
812    
813  int  
814  DataLazy::getBuffsRequired() const  DataLazy::~DataLazy()
815  {  {
816      return m_buffsRequired;     delete[] m_sampleids;
817  }  }
818    
819    
820  // the vector and the offset are a place where the method could write its data if it wishes  /*
821  // it is not obligated to do so. For example, if it has its own storage already, it can use that.    \brief Evaluates the expression using methods on Data.
822  // Hence the return value to indicate where the data is actually stored.    This does the work for the collapse method.
823  // Regardless, the storage should be assumed to be used, even if it isn't.    For reasons of efficiency do not call this method on DataExpanded nodes.
824  const double*  */
825  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  DataReady_ptr
826    DataLazy::collapseToReady() const
827  {  {
828    if (m_op==IDENTITY)      if (m_readytype=='E')
829      { // this is more an efficiency concern than anything else
830        throw DataException("Programmer Error - do not use collapse on Expanded data.");
831      }
832      if (m_op==IDENTITY)
833    {    {
834      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
835    }    }
836    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
837    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
838    const double* right=0;    Data right;
839    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
840    {    {
841      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
842    }    }
843    double* result=&(v[offset]);    Data result;
844      switch(m_op)
845    {    {
846      switch(m_op)      case ADD:
847      {      result=left+right;
     case ADD:       // since these are pointwise ops, pretend each sample is one point  
     tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  
848      break;      break;
849      case SUB:            case SUB:      
850      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      result=left-right;
851      break;      break;
852      case MUL:            case MUL:      
853      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      result=left*right;
854      break;      break;
855      case DIV:            case DIV:      
856      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      result=left/right;
857      break;      break;
 // unary ops  
858      case SIN:      case SIN:
859      tensor_unary_operation(m_samplesize, left, result, ::sin);      result=left.sin();  
860        break;
861        case COS:
862        result=left.cos();
863        break;
864        case TAN:
865        result=left.tan();
866        break;
867        case ASIN:
868        result=left.asin();
869        break;
870        case ACOS:
871        result=left.acos();
872        break;
873        case ATAN:
874        result=left.atan();
875        break;
876        case SINH:
877        result=left.sinh();
878        break;
879        case COSH:
880        result=left.cosh();
881        break;
882        case TANH:
883        result=left.tanh();
884        break;
885        case ERF:
886        result=left.erf();
887        break;
888       case ASINH:
889        result=left.asinh();
890        break;
891       case ACOSH:
892        result=left.acosh();
893        break;
894       case ATANH:
895        result=left.atanh();
896        break;
897        case LOG10:
898        result=left.log10();
899        break;
900        case LOG:
901        result=left.log();
902        break;
903        case SIGN:
904        result=left.sign();
905        break;
906        case ABS:
907        result=left.abs();
908        break;
909        case NEG:
910        result=left.neg();
911        break;
912        case POS:
913        // it doesn't mean anything for delayed.
914        // it will just trigger a deep copy of the lazy object
915        throw DataException("Programmer error - POS not supported for lazy data.");
916        break;
917        case EXP:
918        result=left.exp();
919        break;
920        case SQRT:
921        result=left.sqrt();
922        break;
923        case RECIP:
924        result=left.oneOver();
925        break;
926        case GZ:
927        result=left.wherePositive();
928        break;
929        case LZ:
930        result=left.whereNegative();
931        break;
932        case GEZ:
933        result=left.whereNonNegative();
934        break;
935        case LEZ:
936        result=left.whereNonPositive();
937        break;
938        case NEZ:
939        result=left.whereNonZero(m_tol);
940        break;
941        case EZ:
942        result=left.whereZero(m_tol);
943        break;
944        case SYM:
945        result=left.symmetric();
946        break;
947        case NSYM:
948        result=left.nonsymmetric();
949        break;
950        case PROD:
951        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
952        break;
953        case TRANS:
954        result=left.transpose(m_axis_offset);
955        break;
956        case TRACE:
957        result=left.trace(m_axis_offset);
958        break;
959        case SWAP:
960        result=left.swapaxes(m_axis_offset, m_transpose);
961        break;
962        case MINVAL:
963        result=left.minval();
964        break;
965        case MAXVAL:
966        result=left.minval();
967        break;
968        default:
969        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
970      }
971      return result.borrowReadyPtr();
972    }
973    
974    /*
975       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
976       This method uses the original methods on the Data class to evaluate the expressions.
977       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
978       the purpose of using DataLazy in the first place).
979    */
980    void
981    DataLazy::collapse() const
982    {
983      if (m_op==IDENTITY)
984      {
985        return;
986      }
987      if (m_readytype=='E')
988      { // this is more an efficiency concern than anything else
989        throw DataException("Programmer Error - do not use collapse on Expanded data.");
990      }
991      m_id=collapseToReady();
992      m_op=IDENTITY;
993    }
994    
995    
996    
997    
998    
999    
1000    #define PROC_OP(TYPE,X)                               \
1001        for (int j=0;j<onumsteps;++j)\
1002        {\
1003          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1004          { \
1005    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1006    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1007             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1008    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1009             lroffset+=leftstep; \
1010             rroffset+=rightstep; \
1011          }\
1012          lroffset+=oleftstep;\
1013          rroffset+=orightstep;\
1014        }
1015    
1016    
1017    // The result will be stored in m_samples
1018    // The return value is a pointer to the DataVector, offset is the offset within the return value
1019    const DataTypes::ValueType*
1020    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1021    {
1022    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1023        // collapse so we have a 'E' node or an IDENTITY for some other type
1024      if (m_readytype!='E' && m_op!=IDENTITY)
1025      {
1026        collapse();
1027      }
1028      if (m_op==IDENTITY)  
1029      {
1030        const ValueType& vec=m_id->getVectorRO();
1031        roffset=m_id->getPointOffset(sampleNo, 0);
1032    #ifdef LAZY_STACK_PROF
1033    int x;
1034    if (&x<stackend[omp_get_thread_num()])
1035    {
1036           stackend[omp_get_thread_num()]=&x;
1037    }
1038    #endif
1039        return &(vec);
1040      }
1041      if (m_readytype!='E')
1042      {
1043        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1044      }
1045      if (m_sampleids[tid]==sampleNo)
1046      {
1047        roffset=tid*m_samplesize;
1048        return &(m_samples);        // sample is already resolved
1049      }
1050      m_sampleids[tid]=sampleNo;
1051    
1052      switch (getOpgroup(m_op))
1053      {
1054      case G_UNARY:
1055      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1056      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1057      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1058      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1059      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1060      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1061      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1062      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1063      default:
1064        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1065      }
1066    }
1067    
1068    const DataTypes::ValueType*
1069    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1070    {
1071        // we assume that any collapsing has been done before we get here
1072        // since we only have one argument we don't need to think about only
1073        // processing single points.
1074        // we will also know we won't get identity nodes
1075      if (m_readytype!='E')
1076      {
1077        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1078      }
1079      if (m_op==IDENTITY)
1080      {
1081        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1082      }
1083      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1084      const double* left=&((*leftres)[roffset]);
1085      roffset=m_samplesize*tid;
1086      double* result=&(m_samples[roffset]);
1087      switch (m_op)
1088      {
1089        case SIN:  
1090        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1091      break;      break;
1092      case COS:      case COS:
1093      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1094      break;      break;
1095      case TAN:      case TAN:
1096      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1097      break;      break;
1098      case ASIN:      case ASIN:
1099      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1100      break;      break;
1101      case ACOS:      case ACOS:
1102      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1103      break;      break;
1104      case ATAN:      case ATAN:
1105      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1106      break;      break;
1107      case SINH:      case SINH:
1108      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1109      break;      break;
1110      case COSH:      case COSH:
1111      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1112      break;      break;
1113      case TANH:      case TANH:
1114      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1115      break;      break;
1116      case ERF:      case ERF:
1117  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1118      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1119  #else  #else
1120      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1121      break;      break;
1122  #endif  #endif
1123     case ASINH:     case ASINH:
1124  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1125      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1126  #else  #else
1127      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1128  #endif    #endif  
1129      break;      break;
1130     case ACOSH:     case ACOSH:
1131  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1132      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1133  #else  #else
1134      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1135  #endif    #endif  
1136      break;      break;
1137     case ATANH:     case ATANH:
1138  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1139      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1140  #else  #else
1141      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1142  #endif    #endif  
1143      break;      break;
1144      case LOG10:      case LOG10:
1145      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1146      break;      break;
1147      case LOG:      case LOG:
1148      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1149      break;      break;
1150      case SIGN:      case SIGN:
1151      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1152      break;      break;
1153      case ABS:      case ABS:
1154      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1155      break;      break;
1156      case NEG:      case NEG:
1157      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 357  DataLazy::resolveSample(ValueType& v,int Line 1162  DataLazy::resolveSample(ValueType& v,int
1162      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1163      break;      break;
1164      case EXP:      case EXP:
1165      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1166      break;      break;
1167      case SQRT:      case SQRT:
1168      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1169      break;      break;
1170      case RECIP:      case RECIP:
1171      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 377  DataLazy::resolveSample(ValueType& v,int Line 1182  DataLazy::resolveSample(ValueType& v,int
1182      case LEZ:      case LEZ:
1183      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1184      break;      break;
1185    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1186        case NEZ:
1187        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1188        break;
1189        case EZ:
1190        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1191        break;
1192    
1193      default:      default:
1194      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1195      }    }
1196      return &(m_samples);
1197    }
1198    
1199    
1200    const DataTypes::ValueType*
1201    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1202    {
1203        // we assume that any collapsing has been done before we get here
1204        // since we only have one argument we don't need to think about only
1205        // processing single points.
1206        // we will also know we won't get identity nodes
1207      if (m_readytype!='E')
1208      {
1209        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1210      }
1211      if (m_op==IDENTITY)
1212      {
1213        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1214      }
1215      size_t loffset=0;
1216      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1217    
1218      roffset=m_samplesize*tid;
1219      unsigned int ndpps=getNumDPPSample();
1220      unsigned int psize=DataTypes::noValues(m_left->getShape());
1221      double* result=&(m_samples[roffset]);
1222      switch (m_op)
1223      {
1224        case MINVAL:
1225        {
1226          for (unsigned int z=0;z<ndpps;++z)
1227          {
1228            FMin op;
1229            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1230            loffset+=psize;
1231            result++;
1232          }
1233        }
1234        break;
1235        case MAXVAL:
1236        {
1237          for (unsigned int z=0;z<ndpps;++z)
1238          {
1239          FMax op;
1240          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1241          loffset+=psize;
1242          result++;
1243          }
1244        }
1245        break;
1246        default:
1247        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1248      }
1249      return &(m_samples);
1250    }
1251    
1252    const DataTypes::ValueType*
1253    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1254    {
1255        // we assume that any collapsing has been done before we get here
1256        // since we only have one argument we don't need to think about only
1257        // processing single points.
1258      if (m_readytype!='E')
1259      {
1260        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1261    }    }
1262    return result;    if (m_op==IDENTITY)
1263      {
1264        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1265      }
1266      size_t subroffset;
1267      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1268      roffset=m_samplesize*tid;
1269      size_t loop=0;
1270      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1271      size_t step=getNoValues();
1272      size_t offset=roffset;
1273      switch (m_op)
1274      {
1275        case SYM:
1276        for (loop=0;loop<numsteps;++loop)
1277        {
1278            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1279            subroffset+=step;
1280            offset+=step;
1281        }
1282        break;
1283        case NSYM:
1284        for (loop=0;loop<numsteps;++loop)
1285        {
1286            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1287            subroffset+=step;
1288            offset+=step;
1289        }
1290        break;
1291        default:
1292        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1293      }
1294      return &m_samples;
1295    }
1296    
1297    const DataTypes::ValueType*
1298    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1299    {
1300        // we assume that any collapsing has been done before we get here
1301        // since we only have one argument we don't need to think about only
1302        // processing single points.
1303      if (m_readytype!='E')
1304      {
1305        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1306      }
1307      if (m_op==IDENTITY)
1308      {
1309        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1310      }
1311      size_t subroffset;
1312      size_t offset;
1313      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1314      roffset=m_samplesize*tid;
1315      offset=roffset;
1316      size_t loop=0;
1317      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1318      size_t outstep=getNoValues();
1319      size_t instep=m_left->getNoValues();
1320      switch (m_op)
1321      {
1322        case TRACE:
1323        for (loop=0;loop<numsteps;++loop)
1324        {
1325                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1326            subroffset+=instep;
1327            offset+=outstep;
1328        }
1329        break;
1330        case TRANS:
1331        for (loop=0;loop<numsteps;++loop)
1332        {
1333                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1334            subroffset+=instep;
1335            offset+=outstep;
1336        }
1337        break;
1338        default:
1339        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1340      }
1341      return &m_samples;
1342    }
1343    
1344    
1345    const DataTypes::ValueType*
1346    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1347    {
1348      if (m_readytype!='E')
1349      {
1350        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1351      }
1352      if (m_op==IDENTITY)
1353      {
1354        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1355      }
1356      size_t subroffset;
1357      size_t offset;
1358      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1359      roffset=m_samplesize*tid;
1360      offset=roffset;
1361      size_t loop=0;
1362      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1363      size_t outstep=getNoValues();
1364      size_t instep=m_left->getNoValues();
1365      switch (m_op)
1366      {
1367        case SWAP:
1368        for (loop=0;loop<numsteps;++loop)
1369        {
1370                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1371            subroffset+=instep;
1372            offset+=outstep;
1373        }
1374        break;
1375        default:
1376        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1377      }
1378      return &m_samples;
1379    }
1380    
1381    const DataTypes::ValueType*
1382    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1383    {
1384      if (m_readytype!='E')
1385      {
1386        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1387      }
1388      if (m_op!=CONDEVAL)
1389      {
1390        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1391      }
1392      size_t subroffset;
1393    
1394      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1395      const ValueType* srcres=0;
1396      if ((*maskres)[subroffset]>0)
1397      {
1398        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1399      }
1400      else
1401      {
1402        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1403      }
1404    
1405      // Now we need to copy the result
1406    
1407      roffset=m_samplesize*tid;
1408      for (int i=0;i<m_samplesize;++i)
1409      {
1410        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1411      }
1412    
1413      return &m_samples;
1414    }
1415    
1416    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1417    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1418    // If both children are expanded, then we can process them in a single operation (we treat
1419    // the whole sample as one big datapoint.
1420    // If one of the children is not expanded, then we need to treat each point in the sample
1421    // individually.
1422    // There is an additional complication when scalar operations are considered.
1423    // For example, 2+Vector.
1424    // In this case each double within the point is treated individually
1425    const DataTypes::ValueType*
1426    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1427    {
1428    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1429    
1430      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1431        // first work out which of the children are expanded
1432      bool leftExp=(m_left->m_readytype=='E');
1433      bool rightExp=(m_right->m_readytype=='E');
1434      if (!leftExp && !rightExp)
1435      {
1436        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1437      }
1438      bool leftScalar=(m_left->getRank()==0);
1439      bool rightScalar=(m_right->getRank()==0);
1440      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1441      {
1442        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1443      }
1444      size_t leftsize=m_left->getNoValues();
1445      size_t rightsize=m_right->getNoValues();
1446      size_t chunksize=1;           // how many doubles will be processed in one go
1447      int leftstep=0;       // how far should the left offset advance after each step
1448      int rightstep=0;
1449      int numsteps=0;       // total number of steps for the inner loop
1450      int oleftstep=0;  // the o variables refer to the outer loop
1451      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1452      int onumsteps=1;
1453      
1454      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1455      bool RES=(rightExp && rightScalar);
1456      bool LS=(!leftExp && leftScalar); // left is a single scalar
1457      bool RS=(!rightExp && rightScalar);
1458      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1459      bool RN=(!rightExp && !rightScalar);
1460      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1461      bool REN=(rightExp && !rightScalar);
1462    
1463      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1464      {
1465        chunksize=m_left->getNumDPPSample()*leftsize;
1466        leftstep=0;
1467        rightstep=0;
1468        numsteps=1;
1469      }
1470      else if (LES || RES)
1471      {
1472        chunksize=1;
1473        if (LES)        // left is an expanded scalar
1474        {
1475            if (RS)
1476            {
1477               leftstep=1;
1478               rightstep=0;
1479               numsteps=m_left->getNumDPPSample();
1480            }
1481            else        // RN or REN
1482            {
1483               leftstep=0;
1484               oleftstep=1;
1485               rightstep=1;
1486               orightstep=(RN ? -(int)rightsize : 0);
1487               numsteps=rightsize;
1488               onumsteps=m_left->getNumDPPSample();
1489            }
1490        }
1491        else        // right is an expanded scalar
1492        {
1493            if (LS)
1494            {
1495               rightstep=1;
1496               leftstep=0;
1497               numsteps=m_right->getNumDPPSample();
1498            }
1499            else
1500            {
1501               rightstep=0;
1502               orightstep=1;
1503               leftstep=1;
1504               oleftstep=(LN ? -(int)leftsize : 0);
1505               numsteps=leftsize;
1506               onumsteps=m_right->getNumDPPSample();
1507            }
1508        }
1509      }
1510      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1511      {
1512        if (LEN)    // and Right will be a single value
1513        {
1514            chunksize=rightsize;
1515            leftstep=rightsize;
1516            rightstep=0;
1517            numsteps=m_left->getNumDPPSample();
1518            if (RS)
1519            {
1520               numsteps*=leftsize;
1521            }
1522        }
1523        else    // REN
1524        {
1525            chunksize=leftsize;
1526            rightstep=leftsize;
1527            leftstep=0;
1528            numsteps=m_right->getNumDPPSample();
1529            if (LS)
1530            {
1531               numsteps*=rightsize;
1532            }
1533        }
1534      }
1535    
1536      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1537        // Get the values of sub-expressions
1538      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1539      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1540    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1541    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1542    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1543    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1544    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1545    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1546    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1547    
1548    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1549    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1550    
1551    
1552      roffset=m_samplesize*tid;
1553      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1554      switch(m_op)
1555      {
1556        case ADD:
1557            PROC_OP(NO_ARG,plus<double>());
1558        break;
1559        case SUB:
1560        PROC_OP(NO_ARG,minus<double>());
1561        break;
1562        case MUL:
1563        PROC_OP(NO_ARG,multiplies<double>());
1564        break;
1565        case DIV:
1566        PROC_OP(NO_ARG,divides<double>());
1567        break;
1568        case POW:
1569           PROC_OP(double (double,double),::pow);
1570        break;
1571        default:
1572        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1573      }
1574    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1575      return &m_samples;
1576    }
1577    
1578    
1579    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1580    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1581    // unlike the other resolve helpers, we must treat these datapoints separately.
1582    const DataTypes::ValueType*
1583    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1584    {
1585    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1586    
1587      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1588        // first work out which of the children are expanded
1589      bool leftExp=(m_left->m_readytype=='E');
1590      bool rightExp=(m_right->m_readytype=='E');
1591      int steps=getNumDPPSample();
1592      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1593      int rightStep=(rightExp?m_right->getNoValues() : 0);
1594    
1595      int resultStep=getNoValues();
1596      roffset=m_samplesize*tid;
1597      size_t offset=roffset;
1598    
1599      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1600    
1601      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1602    
1603    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1604    cout << getNoValues() << endl;)
1605    
1606    
1607    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1608    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1609    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1610    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1611    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1612    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1613    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1614    
1615      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1616      switch(m_op)
1617      {
1618        case PROD:
1619        for (int i=0;i<steps;++i,resultp+=resultStep)
1620        {
1621              const double *ptr_0 = &((*left)[lroffset]);
1622              const double *ptr_1 = &((*right)[rroffset]);
1623    
1624    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1625    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1626    
1627              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1628    
1629          lroffset+=leftStep;
1630          rroffset+=rightStep;
1631        }
1632        break;
1633        default:
1634        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1635      }
1636      roffset=offset;
1637      return &m_samples;
1638    }
1639    
1640    
1641    const DataTypes::ValueType*
1642    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1643    {
1644    #ifdef _OPENMP
1645        int tid=omp_get_thread_num();
1646    #else
1647        int tid=0;
1648    #endif
1649    
1650    #ifdef LAZY_STACK_PROF
1651        stackstart[tid]=&tid;
1652        stackend[tid]=&tid;
1653        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1654        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1655        #pragma omp critical
1656        if (d>maxstackuse)
1657        {
1658    cout << "Max resolve Stack use " << d << endl;
1659            maxstackuse=d;
1660        }
1661        return r;
1662    #else
1663        return resolveNodeSample(tid, sampleNo, roffset);
1664    #endif
1665    }
1666    
1667    
1668    // This needs to do the work of the identity constructor
1669    void
1670    DataLazy::resolveToIdentity()
1671    {
1672       if (m_op==IDENTITY)
1673        return;
1674       DataReady_ptr p=resolveNodeWorker();
1675       makeIdentity(p);
1676  }  }
1677    
1678    void DataLazy::makeIdentity(const DataReady_ptr& p)
1679    {
1680       m_op=IDENTITY;
1681       m_axis_offset=0;
1682       m_transpose=0;
1683       m_SL=m_SM=m_SR=0;
1684       m_children=m_height=0;
1685       m_id=p;
1686       if(p->isConstant()) {m_readytype='C';}
1687       else if(p->isExpanded()) {m_readytype='E';}
1688       else if (p->isTagged()) {m_readytype='T';}
1689       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1690       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1691       m_left.reset();
1692       m_right.reset();
1693    }
1694    
1695    
1696  DataReady_ptr  DataReady_ptr
1697  DataLazy::resolve()  DataLazy::resolve()
1698  {  {
1699    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1700    // so the allocation of v needs to move inside the loop somehow      return m_id;
1701    }
1702    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1703    
1704    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1705    int numthreads=1;  void
1706    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1707    {
1708      if (dats.empty())
1709      {
1710        return;
1711      }
1712      vector<DataLazy*> work;
1713      FunctionSpace fs=dats[0]->getFunctionSpace();
1714      bool match=true;
1715      for (int i=dats.size()-1;i>=0;--i)
1716      {
1717        if (dats[i]->m_readytype!='E')
1718        {
1719            dats[i]->collapse();
1720        }
1721        if (dats[i]->m_op!=IDENTITY)
1722        {
1723            work.push_back(dats[i]);
1724            if (fs!=dats[i]->getFunctionSpace())
1725            {
1726                match=false;
1727            }
1728        }
1729      }
1730      if (work.empty())
1731      {
1732        return;     // no work to do
1733      }
1734      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1735      {     // it is possible that dats[0] is one of the objects which we discarded and
1736            // all the other functionspaces match.
1737        vector<DataExpanded*> dep;
1738        vector<ValueType*> vecs;
1739        for (int i=0;i<work.size();++i)
1740        {
1741            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1742            vecs.push_back(&(dep[i]->getVectorRW()));
1743        }
1744        int totalsamples=work[0]->getNumSamples();
1745        const ValueType* res=0; // Storage for answer
1746        int sample;
1747        #pragma omp parallel private(sample, res)
1748        {
1749            size_t roffset=0;
1750            #pragma omp for schedule(static)
1751            for (sample=0;sample<totalsamples;++sample)
1752            {
1753            roffset=0;
1754            int j;
1755            for (j=work.size()-1;j>=0;--j)
1756            {
1757  #ifdef _OPENMP  #ifdef _OPENMP
1758    numthreads=omp_get_max_threads();                  res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1759    int threadnum=0;  #else
1760  #endif                  res=work[j]->resolveNodeSample(0,sample,roffset);
1761    ValueType v(numthreads*threadbuffersize);  #endif
1762  cout << "Buffer created with size=" << v.size() << endl;                  DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1763    ValueType dummy(getNoValues());                  memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1764    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);          }
1765    ValueType& resvec=result->getVector();          }
1766        }
1767        // Now we need to load the new results as identity ops into the lazy nodes
1768        for (int i=work.size()-1;i>=0;--i)
1769        {
1770            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1771        }
1772      }
1773      else  // functionspaces do not match
1774      {
1775        for (int i=0;i<work.size();++i)
1776        {
1777            work[i]->resolveToIdentity();
1778        }
1779      }
1780    }
1781    
1782    
1783    
1784    // This version of resolve uses storage in each node to hold results
1785    DataReady_ptr
1786    DataLazy::resolveNodeWorker()
1787    {
1788      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1789      {
1790        collapse();
1791      }
1792      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1793      {
1794        return m_id;
1795      }
1796        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1797      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1798      ValueType& resvec=result->getVectorRW();
1799    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1800    
1801    int sample;    int sample;
   int resoffset;  
1802    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1803    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;   // Storage for answer
1804    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1805      #pragma omp parallel private(sample,res)
1806    {    {
1807        size_t roffset=0;
1808    #ifdef LAZY_STACK_PROF
1809        stackstart[omp_get_thread_num()]=&roffset;
1810        stackend[omp_get_thread_num()]=&roffset;
1811    #endif
1812        #pragma omp for schedule(static)
1813        for (sample=0;sample<totalsamples;++sample)
1814        {
1815            roffset=0;
1816  #ifdef _OPENMP  #ifdef _OPENMP
1817      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1818  #else  #else
1819      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1820  #endif  #endif
1821      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1822      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1823      {              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1824      resvec[resoffset]=res[i];              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1825      }      }
1826      }
1827    #ifdef LAZY_STACK_PROF
1828      for (int i=0;i<getNumberOfThreads();++i)
1829      {
1830        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1831    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1832        if (r>maxstackuse)
1833        {
1834            maxstackuse=r;
1835        }
1836    }    }
1837      cout << "Max resolve Stack use=" << maxstackuse << endl;
1838    #endif
1839    return resptr;    return resptr;
1840  }  }
1841    
# Line 430  std::string Line 1843  std::string
1843  DataLazy::toString() const  DataLazy::toString() const
1844  {  {
1845    ostringstream oss;    ostringstream oss;
1846    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1847    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1848      {
1849      case 1:   // tree format
1850        oss << endl;
1851        intoTreeString(oss,"");
1852        break;
1853      case 2:   // just the depth
1854        break;
1855      default:
1856        intoString(oss);
1857        break;
1858      }
1859    return oss.str();    return oss.str();
1860  }  }
1861    
1862    
1863  void  void
1864  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1865  {  {
1866    //    oss << "[" << m_children <<";"<<m_height <<"]";
1867    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1868    {    {
1869    case G_IDENTITY:    case G_IDENTITY:
1870        if (m_id->isExpanded())
1871        {
1872           oss << "E";
1873        }
1874        else if (m_id->isTagged())
1875        {
1876          oss << "T";
1877        }
1878        else if (m_id->isConstant())
1879        {
1880          oss << "C";
1881        }
1882        else
1883        {
1884          oss << "?";
1885        }
1886      oss << '@' << m_id.get();      oss << '@' << m_id.get();
1887      break;      break;
1888    case G_BINARY:    case G_BINARY:
# Line 451  DataLazy::intoString(ostringstream& oss) Line 1893  DataLazy::intoString(ostringstream& oss)
1893      oss << ')';      oss << ')';
1894      break;      break;
1895    case G_UNARY:    case G_UNARY:
1896      case G_UNARY_P:
1897      case G_NP1OUT:
1898      case G_NP1OUT_P:
1899      case G_REDUCTION:
1900        oss << opToString(m_op) << '(';
1901        m_left->intoString(oss);
1902        oss << ')';
1903        break;
1904      case G_TENSORPROD:
1905        oss << opToString(m_op) << '(';
1906        m_left->intoString(oss);
1907        oss << ", ";
1908        m_right->intoString(oss);
1909        oss << ')';
1910        break;
1911      case G_NP1OUT_2P:
1912      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1913      m_left->intoString(oss);      m_left->intoString(oss);
1914        oss << ", " << m_axis_offset << ", " << m_transpose;
1915        oss << ')';
1916        break;
1917      case G_CONDEVAL:
1918        oss << opToString(m_op)<< '(' ;
1919        m_mask->intoString(oss);
1920        oss << " ? ";
1921        m_left->intoString(oss);
1922        oss << " : ";
1923        m_right->intoString(oss);
1924      oss << ')';      oss << ')';
1925      break;      break;
1926    default:    default:
# Line 460  DataLazy::intoString(ostringstream& oss) Line 1928  DataLazy::intoString(ostringstream& oss)
1928    }    }
1929  }  }
1930    
1931  // Note that in this case, deepCopy does not make copies of the leaves.  
1932  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1933    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1934    {
1935      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1936      switch (getOpgroup(m_op))
1937      {
1938      case G_IDENTITY:
1939        if (m_id->isExpanded())
1940        {
1941           oss << "E";
1942        }
1943        else if (m_id->isTagged())
1944        {
1945          oss << "T";
1946        }
1947        else if (m_id->isConstant())
1948        {
1949          oss << "C";
1950        }
1951        else
1952        {
1953          oss << "?";
1954        }
1955        oss << '@' << m_id.get() << endl;
1956        break;
1957      case G_BINARY:
1958        oss << opToString(m_op) << endl;
1959        indent+='.';
1960        m_left->intoTreeString(oss, indent);
1961        m_right->intoTreeString(oss, indent);
1962        break;
1963      case G_UNARY:
1964      case G_UNARY_P:
1965      case G_NP1OUT:
1966      case G_NP1OUT_P:
1967      case G_REDUCTION:
1968        oss << opToString(m_op) << endl;
1969        indent+='.';
1970        m_left->intoTreeString(oss, indent);
1971        break;
1972      case G_TENSORPROD:
1973        oss << opToString(m_op) << endl;
1974        indent+='.';
1975        m_left->intoTreeString(oss, indent);
1976        m_right->intoTreeString(oss, indent);
1977        break;
1978      case G_NP1OUT_2P:
1979        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1980        indent+='.';
1981        m_left->intoTreeString(oss, indent);
1982        break;
1983      default:
1984        oss << "UNKNOWN";
1985      }
1986    }
1987    
1988    
1989  DataAbstract*  DataAbstract*
1990  DataLazy::deepCopy()  DataLazy::deepCopy()
1991  {  {
1992    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1993    {    {
1994      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1995      case G_UNARY:
1996      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1997      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1998      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1999      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2000      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2002      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003      default:
2004        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2005    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2006  }  }
2007    
2008    
2009    
2010    // There is no single, natural interpretation of getLength on DataLazy.
2011    // Instances of DataReady can look at the size of their vectors.
2012    // For lazy though, it could be the size the data would be if it were resolved;
2013    // or it could be some function of the lengths of the DataReady instances which
2014    // form part of the expression.
2015    // Rather than have people making assumptions, I have disabled the method.
2016  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2017  DataLazy::getLength() const  DataLazy::getLength() const
2018  {  {
2019    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2020  }  }
2021    
2022    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 2026  DataLazy::getSlice(const DataTypes::Regi
2026    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2027  }  }
2028    
2029    
2030    // To do this we need to rely on our child nodes
2031    DataTypes::ValueType::size_type
2032    DataLazy::getPointOffset(int sampleNo,
2033                     int dataPointNo)
2034    {
2035      if (m_op==IDENTITY)
2036      {
2037        return m_id->getPointOffset(sampleNo,dataPointNo);
2038      }
2039      if (m_readytype!='E')
2040      {
2041        collapse();
2042        return m_id->getPointOffset(sampleNo,dataPointNo);
2043      }
2044      // at this point we do not have an identity node and the expression will be Expanded
2045      // so we only need to know which child to ask
2046      if (m_left->m_readytype=='E')
2047      {
2048        return m_left->getPointOffset(sampleNo,dataPointNo);
2049      }
2050      else
2051      {
2052        return m_right->getPointOffset(sampleNo,dataPointNo);
2053      }
2054    }
2055    
2056    // To do this we need to rely on our child nodes
2057  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2058  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2059                   int dataPointNo) const                   int dataPointNo) const
2060  {  {
2061    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2062      {
2063        return m_id->getPointOffset(sampleNo,dataPointNo);
2064      }
2065      if (m_readytype=='E')
2066      {
2067        // at this point we do not have an identity node and the expression will be Expanded
2068        // so we only need to know which child to ask
2069        if (m_left->m_readytype=='E')
2070        {
2071        return m_left->getPointOffset(sampleNo,dataPointNo);
2072        }
2073        else
2074        {
2075        return m_right->getPointOffset(sampleNo,dataPointNo);
2076        }
2077      }
2078      if (m_readytype=='C')
2079      {
2080        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2081      }
2082      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2083    }
2084    
2085    
2086    // I have decided to let Data:: handle this issue.
2087    void
2088    DataLazy::setToZero()
2089    {
2090    //   DataTypes::ValueType v(getNoValues(),0);
2091    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2092    //   m_op=IDENTITY;
2093    //   m_right.reset();  
2094    //   m_left.reset();
2095    //   m_readytype='C';
2096    //   m_buffsRequired=1;
2097    
2098      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2099      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2100    }
2101    
2102    bool
2103    DataLazy::actsExpanded() const
2104    {
2105        return (m_readytype=='E');
2106  }  }
2107    
2108  }   // end namespace  }   // end namespace

Legend:
Removed from v.1888  
changed lines
  Added in v.4657

  ViewVC Help
Powered by ViewVC 1.1.26