/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 4507 by jfenwick, Wed Jul 24 02:50:22 2013 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2013 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development since 2012 by School of Earth Sciences
13    *
14    *****************************************************************************/
15    
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
19  #ifdef _OPENMP  #ifdef _OPENMP
20  #include <omp.h>  #include <omp.h>
21  #endif  #endif
# Line 26  Line 23 
23  #include "DataTypes.h"  #include "DataTypes.h"
24  #include "Data.h"  #include "Data.h"
25  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
26    #include "Utils.h"
27    
28    #include "EscriptParams.h"
29    
30    #ifdef USE_NETCDF
31    #include <netcdfcpp.h>
32    #endif
33    
34    #include <iomanip>      // for some fancy formatting in debug
35    
36    // #define LAZYDEBUG(X) if (privdebug){X;}
37    #define LAZYDEBUG(X)
38    namespace
39    {
40    bool privdebug=false;
41    
42    #define ENABLEDEBUG privdebug=true;
43    #define DISABLEDEBUG privdebug=false;
44    }
45    
46    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47    
48    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49    
50    
51    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53    /*
54    How does DataLazy work?
55    ~~~~~~~~~~~~~~~~~~~~~~~
56    
57    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
58    denoted left and right.
59    
60    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
61    This means that all "internal" nodes in the structure are instances of DataLazy.
62    
63    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
64    Note that IDENTITY is not considered a unary operation.
65    
66    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
67    It must however form a DAG (directed acyclic graph).
68    I will refer to individual DataLazy objects with the structure as nodes.
69    
70    Each node also stores:
71    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
72        evaluated.
73    - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
74        evaluate the expression.
75    - m_samplesize ~ the number of doubles stored in a sample.
76    
77    When a new node is created, the above values are computed based on the values in the child nodes.
78    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
79    
80    The resolve method, which produces a DataReady from a DataLazy, does the following:
81    1) Create a DataReady to hold the new result.
82    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
83    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
84    
85    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
86    
87    resolveSample returns a Vector* and an offset within that vector where the result is stored.
88    Normally, this would be v, but for identity nodes their internal vector is returned instead.
89    
90    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
91    
92    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
93    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
94    
95    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
96    1) Add to the ES_optype.
97    2) determine what opgroup your operation belongs to (X)
98    3) add a string for the op to the end of ES_opstrings
99    4) increase ES_opcount
100    5) add an entry (X) to opgroups
101    6) add an entry to the switch in collapseToReady
102    7) add an entry to resolveX
103    */
104    
105    
106  using namespace std;  using namespace std;
107  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 109  using namespace boost;
109  namespace escript  namespace escript
110  {  {
111    
 const std::string&  
 opToString(ES_optype op);  
   
112  namespace  namespace
113  {  {
114    
115    
116    // enabling this will print out when ever the maximum stacksize used by resolve increases
117    // it assumes _OPENMP is also in use
118    //#define LAZY_STACK_PROF
119    
120    
121    
122    #ifndef _OPENMP
123      #ifdef LAZY_STACK_PROF
124      #undef LAZY_STACK_PROF
125      #endif
126    #endif
127    
128    
129    #ifdef LAZY_STACK_PROF
130    std::vector<void*> stackstart(getNumberOfThreads());
131    std::vector<void*> stackend(getNumberOfThreads());
132    size_t maxstackuse=0;
133    #endif
134    
135  enum ES_opgroup  enum ES_opgroup
136  {  {
137     G_UNKNOWN,     G_UNKNOWN,
138     G_IDENTITY,     G_IDENTITY,
139     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
140     G_UNARY     G_UNARY,     // pointwise operations with one argument
141       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
142       G_NP1OUT,        // non-pointwise op with one output
143       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
144       G_TENSORPROD,    // general tensor product
145       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
146       G_REDUCTION,     // non-pointwise unary op with a scalar output
147       G_CONDEVAL
148  };  };
149    
150    
151    
152    
153  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
154                "sin","cos","tan",
155              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
156              "asinh","acosh","atanh",              "asinh","acosh","atanh",
157              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
158              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
159  int ES_opcount=32;              "symmetric","nonsymmetric",
160  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
161              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose", "trace",
162              G_UNARY,G_UNARY,G_UNARY,                    // 19              "swapaxes",
163              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27              "minval", "maxval",
164              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              "condEval"};
165    int ES_opcount=44;
166    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167                G_UNARY,G_UNARY,G_UNARY, //10
168                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
169                G_UNARY,G_UNARY,G_UNARY,                    // 20
170                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
171                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
172                G_NP1OUT,G_NP1OUT,
173                G_TENSORPROD,
174                G_NP1OUT_P, G_NP1OUT_P,
175                G_NP1OUT_2P,
176                G_REDUCTION, G_REDUCTION,
177                G_CONDEVAL};
178  inline  inline
179  ES_opgroup  ES_opgroup
180  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 191  resultFS(DataAbstract_ptr left, DataAbst
191      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
192      // programming error exception.      // programming error exception.
193    
194      FunctionSpace l=left->getFunctionSpace();
195      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
196      {    if (l!=r)
197          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
198      }      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
199      return left->getFunctionSpace();      if (res==1)
200        {
201        return l;
202        }
203        if (res==-1)
204        {
205        return r;
206        }
207        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
208      }
209      return l;
210  }  }
211    
212  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
213    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
214  DataTypes::ShapeType  DataTypes::ShapeType
215  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
216  {  {
217      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
218      {      {
219          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
220          {
221            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
222          }
223    
224          if (left->getRank()==0)   // we need to allow scalar * anything
225          {
226            return right->getShape();
227          }
228          if (right->getRank()==0)
229          {
230            return left->getShape();
231          }
232          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
233      }      }
234      return left->getShape();      return left->getShape();
235  }  }
236    
237  size_t  // return the shape for "op left"
238  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
239    DataTypes::ShapeType
240    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
241  {  {
242     switch (getOpgroup(op))      switch(op)
243     {      {
244     case G_BINARY: return left->getLength();          case TRANS:
245     case G_UNARY: return left->getLength();         {            // for the scoping of variables
246     default:          const DataTypes::ShapeType& s=left->getShape();
247      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
248     }          int rank=left->getRank();
249            if (axis_offset<0 || axis_offset>rank)
250            {
251                stringstream e;
252                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
253                throw DataException(e.str());
254            }
255            for (int i=0; i<rank; i++)
256            {
257               int index = (axis_offset+i)%rank;
258               sh.push_back(s[index]); // Append to new shape
259            }
260            return sh;
261           }
262        break;
263        case TRACE:
264           {
265            int rank=left->getRank();
266            if (rank<2)
267            {
268               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
269            }
270            if ((axis_offset>rank-2) || (axis_offset<0))
271            {
272               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
273            }
274            if (rank==2)
275            {
276               return DataTypes::scalarShape;
277            }
278            else if (rank==3)
279            {
280               DataTypes::ShapeType sh;
281                   if (axis_offset==0)
282               {
283                    sh.push_back(left->getShape()[2]);
284                   }
285                   else     // offset==1
286               {
287                sh.push_back(left->getShape()[0]);
288                   }
289               return sh;
290            }
291            else if (rank==4)
292            {
293               DataTypes::ShapeType sh;
294               const DataTypes::ShapeType& s=left->getShape();
295                   if (axis_offset==0)
296               {
297                    sh.push_back(s[2]);
298                    sh.push_back(s[3]);
299                   }
300                   else if (axis_offset==1)
301               {
302                    sh.push_back(s[0]);
303                    sh.push_back(s[3]);
304                   }
305               else     // offset==2
306               {
307                sh.push_back(s[0]);
308                sh.push_back(s[1]);
309               }
310               return sh;
311            }
312            else        // unknown rank
313            {
314               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
315            }
316           }
317        break;
318            default:
319        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
320        }
321  }  }
322    
323  int  DataTypes::ShapeType
324  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
325  {  {
326     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
327     {       // Some of the checks are probably redundant here
328     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
329     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
330     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
331     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
332      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
333     }       int rank=left->getRank();
334         if (rank<2) {
335            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
336         }
337         if (axis0<0 || axis0>rank-1) {
338            stringstream e;
339            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
340            throw DataException(e.str());
341         }
342         if (axis1<0 || axis1>rank-1) {
343            stringstream e;
344            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
345            throw DataException(e.str());
346         }
347         if (axis0 == axis1) {
348             throw DataException("Error - Data::swapaxes: axis indices must be different.");
349         }
350         if (axis0 > axis1) {
351             axis0_tmp=axis1;
352             axis1_tmp=axis0;
353         } else {
354             axis0_tmp=axis0;
355             axis1_tmp=axis1;
356         }
357         for (int i=0; i<rank; i++) {
358           if (i == axis0_tmp) {
359              out_shape.push_back(s[axis1_tmp]);
360           } else if (i == axis1_tmp) {
361              out_shape.push_back(s[axis0_tmp]);
362           } else {
363              out_shape.push_back(s[i]);
364           }
365         }
366        return out_shape;
367  }  }
368    
369    
370    // determine the output shape for the general tensor product operation
371    // the additional parameters return information required later for the product
372    // the majority of this code is copy pasted from C_General_Tensor_Product
373    DataTypes::ShapeType
374    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
375    {
376        
377      // Get rank and shape of inputs
378      int rank0 = left->getRank();
379      int rank1 = right->getRank();
380      const DataTypes::ShapeType& shape0 = left->getShape();
381      const DataTypes::ShapeType& shape1 = right->getShape();
382    
383      // Prepare for the loops of the product and verify compatibility of shapes
384      int start0=0, start1=0;
385      if (transpose == 0)       {}
386      else if (transpose == 1)  { start0 = axis_offset; }
387      else if (transpose == 2)  { start1 = rank1-axis_offset; }
388      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
389    
390      if (rank0<axis_offset)
391      {
392        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
393      }
394    
395      // Adjust the shapes for transpose
396      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
397      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
398      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
399      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
400    
401      // Prepare for the loops of the product
402      SL=1, SM=1, SR=1;
403      for (int i=0; i<rank0-axis_offset; i++)   {
404        SL *= tmpShape0[i];
405      }
406      for (int i=rank0-axis_offset; i<rank0; i++)   {
407        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
408          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
409        }
410        SM *= tmpShape0[i];
411      }
412      for (int i=axis_offset; i<rank1; i++)     {
413        SR *= tmpShape1[i];
414      }
415    
416      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
417      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
418      {         // block to limit the scope of out_index
419         int out_index=0;
420         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
421         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
422      }
423    
424      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
425      {
426         ostringstream os;
427         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
428         throw DataException(os.str());
429      }
430    
431      return shape2;
432    }
433    
434  }   // end anonymous namespace  }   // end anonymous namespace
435    
436    
437    
438    // Return a string representing the operation
439  const std::string&  const std::string&
440  opToString(ES_optype op)  opToString(ES_optype op)
441  {  {
# Line 138  opToString(ES_optype op) Line 446  opToString(ES_optype op)
446    return ES_opstrings[op];    return ES_opstrings[op];
447  }  }
448    
449    void DataLazy::LazyNodeSetup()
450    {
451    #ifdef _OPENMP
452        int numthreads=omp_get_max_threads();
453        m_samples.resize(numthreads*m_samplesize);
454        m_sampleids=new int[numthreads];
455        for (int i=0;i<numthreads;++i)
456        {
457            m_sampleids[i]=-1;  
458        }
459    #else
460        m_samples.resize(m_samplesize);
461        m_sampleids=new int[1];
462        m_sampleids[0]=-1;
463    #endif  // _OPENMP
464    }
465    
466    
467    // Creates an identity node
468  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
469      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
470      m_op(IDENTITY)      ,m_sampleids(0),
471        m_samples(1)
472  {  {
473     if (p->isLazy())     if (p->isLazy())
474     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
475      // I don't want identity of Lazy.      // I don't want identity of Lazy.
476      // Question: Why would that be so bad?      // Question: Why would that be so bad?
477      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 153  DataLazy::DataLazy(DataAbstract_ptr p) Line 479  DataLazy::DataLazy(DataAbstract_ptr p)
479     }     }
480     else     else
481     {     {
482      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
483        DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
484        makeIdentity(dr);
485    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
486     }     }
487     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
488  }  }
489    
490  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
491      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
492      m_op(op)      m_op(op),
493        m_axis_offset(0),
494        m_transpose(0),
495        m_SL(0), m_SM(0), m_SR(0)
496  {  {
497     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
498     {     {
499      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
500     }     }
501    
502     DataLazy_ptr lleft;     DataLazy_ptr lleft;
503     if (!left->isLazy())     if (!left->isLazy())
504     {     {
# Line 178  DataLazy::DataLazy(DataAbstract_ptr left Line 508  DataLazy::DataLazy(DataAbstract_ptr left
508     {     {
509      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
510     }     }
511     m_length=left->getLength();     m_readytype=lleft->m_readytype;
512     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
513     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
514       m_children=m_left->m_children+1;
515       m_height=m_left->m_height+1;
516       LazyNodeSetup();
517       SIZELIMIT
518  }  }
519    
520    
521  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
522    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
523      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
524      m_left(left),      m_op(op),
525      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
526  {  {
527     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
528       if ((getOpgroup(op)!=G_BINARY))
529     {     {
530      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
531     }     }
532     m_length=resultLength(m_left,m_right,m_op);  
533       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
534       {
535        FunctionSpace fs=getFunctionSpace();
536        Data ltemp(left);
537        Data tmp(ltemp,fs);
538        left=tmp.borrowDataPtr();
539       }
540       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
541       {
542        Data tmp(Data(right),getFunctionSpace());
543        right=tmp.borrowDataPtr();
544    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
545       }
546       left->operandCheck(*right);
547    
548       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
549       {
550        m_left=dynamic_pointer_cast<DataLazy>(left);
551    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
552       }
553       else
554       {
555        m_left=DataLazy_ptr(new DataLazy(left));
556    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
557       }
558       if (right->isLazy())
559       {
560        m_right=dynamic_pointer_cast<DataLazy>(right);
561    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
562       }
563       else
564       {
565        m_right=DataLazy_ptr(new DataLazy(right));
566    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
567       }
568       char lt=m_left->m_readytype;
569       char rt=m_right->m_readytype;
570       if (lt=='E' || rt=='E')
571       {
572        m_readytype='E';
573       }
574       else if (lt=='T' || rt=='T')
575       {
576        m_readytype='T';
577       }
578       else
579       {
580        m_readytype='C';
581       }
582     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
583     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+m_right->m_children+2;
584  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
585       LazyNodeSetup();
586       SIZELIMIT
587    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588  }  }
589    
590  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
591      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
592      m_op(op)      m_op(op),
593        m_axis_offset(axis_offset),
594        m_transpose(transpose)
595  {  {
596     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
597       {
598        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
599       }
600       if ((transpose>2) || (transpose<0))
601       {
602        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
603       }
604       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
605     {     {
606      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");      FunctionSpace fs=getFunctionSpace();
607        Data ltemp(left);
608        Data tmp(ltemp,fs);
609        left=tmp.borrowDataPtr();
610     }     }
611     if (left->isLazy())     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
612       {
613        Data tmp(Data(right),getFunctionSpace());
614        right=tmp.borrowDataPtr();
615       }
616    //    left->operandCheck(*right);
617    
618       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
619     {     {
620      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
621     }     }
# Line 225  DataLazy::DataLazy(DataAbstract_ptr left Line 631  DataLazy::DataLazy(DataAbstract_ptr left
631     {     {
632      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
633     }     }
634       char lt=m_left->m_readytype;
635       char rt=m_right->m_readytype;
636       if (lt=='E' || rt=='E')
637       {
638        m_readytype='E';
639       }
640       else if (lt=='T' || rt=='T')
641       {
642        m_readytype='T';
643       }
644       else
645       {
646        m_readytype='C';
647       }
648       m_samplesize=getNumDPPSample()*getNoValues();
649       m_children=m_left->m_children+m_right->m_children+2;
650       m_height=max(m_left->m_height,m_right->m_height)+1;
651       LazyNodeSetup();
652       SIZELIMIT
653    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
654    }
655    
656    
657    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
658        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
659        m_op(op),
660        m_axis_offset(axis_offset),
661        m_transpose(0),
662        m_tol(0)
663    {
664       if ((getOpgroup(op)!=G_NP1OUT_P))
665       {
666        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
667       }
668       DataLazy_ptr lleft;
669       if (!left->isLazy())
670       {
671        lleft=DataLazy_ptr(new DataLazy(left));
672       }
673       else
674       {
675        lleft=dynamic_pointer_cast<DataLazy>(left);
676       }
677       m_readytype=lleft->m_readytype;
678       m_left=lleft;
679       m_samplesize=getNumDPPSample()*getNoValues();
680       m_children=m_left->m_children+1;
681       m_height=m_left->m_height+1;
682       LazyNodeSetup();
683       SIZELIMIT
684    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
685    }
686    
687    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
688        : parent(left->getFunctionSpace(), left->getShape()),
689        m_op(op),
690        m_axis_offset(0),
691        m_transpose(0),
692        m_tol(tol)
693    {
694       if ((getOpgroup(op)!=G_UNARY_P))
695       {
696        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
697       }
698       DataLazy_ptr lleft;
699       if (!left->isLazy())
700       {
701        lleft=DataLazy_ptr(new DataLazy(left));
702       }
703       else
704       {
705        lleft=dynamic_pointer_cast<DataLazy>(left);
706       }
707       m_readytype=lleft->m_readytype;
708       m_left=lleft;
709       m_samplesize=getNumDPPSample()*getNoValues();
710       m_children=m_left->m_children+1;
711       m_height=m_left->m_height+1;
712       LazyNodeSetup();
713       SIZELIMIT
714    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
715    }
716    
717     m_length=resultLength(m_left,m_right,m_op);  
718    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
719        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
720        m_op(op),
721        m_axis_offset(axis0),
722        m_transpose(axis1),
723        m_tol(0)
724    {
725       if ((getOpgroup(op)!=G_NP1OUT_2P))
726       {
727        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
728       }
729       DataLazy_ptr lleft;
730       if (!left->isLazy())
731       {
732        lleft=DataLazy_ptr(new DataLazy(left));
733       }
734       else
735       {
736        lleft=dynamic_pointer_cast<DataLazy>(left);
737       }
738       m_readytype=lleft->m_readytype;
739       m_left=lleft;
740     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
741     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+1;
742  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
743       LazyNodeSetup();
744       SIZELIMIT
745    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
746  }  }
747    
748    
749  DataLazy::~DataLazy()  namespace
750  {  {
751    
752        inline int max3(int a, int b, int c)
753        {
754        int t=(a>b?a:b);
755        return (t>c?t:c);
756    
757        }
758    }
759    
760    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
761        : parent(left->getFunctionSpace(), left->getShape()),
762        m_op(CONDEVAL),
763        m_axis_offset(0),
764        m_transpose(0),
765        m_tol(0)
766    {
767    
768       DataLazy_ptr lmask;
769       DataLazy_ptr lleft;
770       DataLazy_ptr lright;
771       if (!mask->isLazy())
772       {
773        lmask=DataLazy_ptr(new DataLazy(mask));
774       }
775       else
776       {
777        lmask=dynamic_pointer_cast<DataLazy>(mask);
778       }
779       if (!left->isLazy())
780       {
781        lleft=DataLazy_ptr(new DataLazy(left));
782       }
783       else
784       {
785        lleft=dynamic_pointer_cast<DataLazy>(left);
786       }
787       if (!right->isLazy())
788       {
789        lright=DataLazy_ptr(new DataLazy(right));
790       }
791       else
792       {
793        lright=dynamic_pointer_cast<DataLazy>(right);
794       }
795       m_readytype=lmask->m_readytype;
796       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
797       {
798        throw DataException("Programmer Error - condEval arguments must have the same readytype");
799       }
800       m_left=lleft;
801       m_right=lright;
802       m_mask=lmask;
803       m_samplesize=getNumDPPSample()*getNoValues();
804       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
805       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
806       LazyNodeSetup();
807       SIZELIMIT
808    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
809  }  }
810    
811    
812  int  
813  DataLazy::getBuffsRequired() const  DataLazy::~DataLazy()
814  {  {
815      return m_buffsRequired;     delete[] m_sampleids;
816  }  }
817    
818    
819  // the vector and the offset are a place where the method could write its data if it wishes  /*
820  // it is not obligated to do so. For example, if it has its own storage already, it can use that.    \brief Evaluates the expression using methods on Data.
821  // Hence the return value to indicate where the data is actually stored.    This does the work for the collapse method.
822  // Regardless, the storage should be assumed to be used, even if it isn't.    For reasons of efficiency do not call this method on DataExpanded nodes.
823  const double*  */
824  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  DataReady_ptr
825    DataLazy::collapseToReady()
826  {  {
827    if (m_op==IDENTITY)      if (m_readytype=='E')
828      { // this is more an efficiency concern than anything else
829        throw DataException("Programmer Error - do not use collapse on Expanded data.");
830      }
831      if (m_op==IDENTITY)
832    {    {
833      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
834    }    }
835    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
836    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
837    const double* right=0;    Data right;
838    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
839    {    {
840      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
841    }    }
842    double* result=&(v[offset]);    Data result;
843      switch(m_op)
844    {    {
845      switch(m_op)      case ADD:
846      {      result=left+right;
     case ADD:       // since these are pointwise ops, pretend each sample is one point  
     tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  
847      break;      break;
848      case SUB:            case SUB:      
849      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      result=left-right;
850      break;      break;
851      case MUL:            case MUL:      
852      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      result=left*right;
853      break;      break;
854      case DIV:            case DIV:      
855      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      result=left/right;
856      break;      break;
 // unary ops  
857      case SIN:      case SIN:
858      tensor_unary_operation(m_samplesize, left, result, ::sin);      result=left.sin();  
859        break;
860        case COS:
861        result=left.cos();
862        break;
863        case TAN:
864        result=left.tan();
865        break;
866        case ASIN:
867        result=left.asin();
868        break;
869        case ACOS:
870        result=left.acos();
871        break;
872        case ATAN:
873        result=left.atan();
874        break;
875        case SINH:
876        result=left.sinh();
877        break;
878        case COSH:
879        result=left.cosh();
880        break;
881        case TANH:
882        result=left.tanh();
883        break;
884        case ERF:
885        result=left.erf();
886        break;
887       case ASINH:
888        result=left.asinh();
889        break;
890       case ACOSH:
891        result=left.acosh();
892        break;
893       case ATANH:
894        result=left.atanh();
895        break;
896        case LOG10:
897        result=left.log10();
898        break;
899        case LOG:
900        result=left.log();
901        break;
902        case SIGN:
903        result=left.sign();
904        break;
905        case ABS:
906        result=left.abs();
907        break;
908        case NEG:
909        result=left.neg();
910        break;
911        case POS:
912        // it doesn't mean anything for delayed.
913        // it will just trigger a deep copy of the lazy object
914        throw DataException("Programmer error - POS not supported for lazy data.");
915        break;
916        case EXP:
917        result=left.exp();
918        break;
919        case SQRT:
920        result=left.sqrt();
921        break;
922        case RECIP:
923        result=left.oneOver();
924        break;
925        case GZ:
926        result=left.wherePositive();
927        break;
928        case LZ:
929        result=left.whereNegative();
930        break;
931        case GEZ:
932        result=left.whereNonNegative();
933        break;
934        case LEZ:
935        result=left.whereNonPositive();
936        break;
937        case NEZ:
938        result=left.whereNonZero(m_tol);
939        break;
940        case EZ:
941        result=left.whereZero(m_tol);
942        break;
943        case SYM:
944        result=left.symmetric();
945        break;
946        case NSYM:
947        result=left.nonsymmetric();
948        break;
949        case PROD:
950        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
951        break;
952        case TRANS:
953        result=left.transpose(m_axis_offset);
954        break;
955        case TRACE:
956        result=left.trace(m_axis_offset);
957        break;
958        case SWAP:
959        result=left.swapaxes(m_axis_offset, m_transpose);
960        break;
961        case MINVAL:
962        result=left.minval();
963        break;
964        case MAXVAL:
965        result=left.minval();
966        break;
967        default:
968        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
969      }
970      return result.borrowReadyPtr();
971    }
972    
973    /*
974       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
975       This method uses the original methods on the Data class to evaluate the expressions.
976       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
977       the purpose of using DataLazy in the first place).
978    */
979    void
980    DataLazy::collapse()
981    {
982      if (m_op==IDENTITY)
983      {
984        return;
985      }
986      if (m_readytype=='E')
987      { // this is more an efficiency concern than anything else
988        throw DataException("Programmer Error - do not use collapse on Expanded data.");
989      }
990      m_id=collapseToReady();
991      m_op=IDENTITY;
992    }
993    
994    
995    
996    
997    
998    
999    #define PROC_OP(TYPE,X)                               \
1000        for (int j=0;j<onumsteps;++j)\
1001        {\
1002          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1003          { \
1004    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1005    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1006             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1007    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1008             lroffset+=leftstep; \
1009             rroffset+=rightstep; \
1010          }\
1011          lroffset+=oleftstep;\
1012          rroffset+=orightstep;\
1013        }
1014    
1015    
1016    // The result will be stored in m_samples
1017    // The return value is a pointer to the DataVector, offset is the offset within the return value
1018    const DataTypes::ValueType*
1019    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1020    {
1021    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1022        // collapse so we have a 'E' node or an IDENTITY for some other type
1023      if (m_readytype!='E' && m_op!=IDENTITY)
1024      {
1025        collapse();
1026      }
1027      if (m_op==IDENTITY)  
1028      {
1029        const ValueType& vec=m_id->getVectorRO();
1030        roffset=m_id->getPointOffset(sampleNo, 0);
1031    #ifdef LAZY_STACK_PROF
1032    int x;
1033    if (&x<stackend[omp_get_thread_num()])
1034    {
1035           stackend[omp_get_thread_num()]=&x;
1036    }
1037    #endif
1038        return &(vec);
1039      }
1040      if (m_readytype!='E')
1041      {
1042        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1043      }
1044      if (m_sampleids[tid]==sampleNo)
1045      {
1046        roffset=tid*m_samplesize;
1047        return &(m_samples);        // sample is already resolved
1048      }
1049      m_sampleids[tid]=sampleNo;
1050    
1051      switch (getOpgroup(m_op))
1052      {
1053      case G_UNARY:
1054      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1055      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1056      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1057      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1058      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1059      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1060      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1061      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1062      default:
1063        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1064      }
1065    }
1066    
1067    const DataTypes::ValueType*
1068    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1069    {
1070        // we assume that any collapsing has been done before we get here
1071        // since we only have one argument we don't need to think about only
1072        // processing single points.
1073        // we will also know we won't get identity nodes
1074      if (m_readytype!='E')
1075      {
1076        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1077      }
1078      if (m_op==IDENTITY)
1079      {
1080        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1081      }
1082      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1083      const double* left=&((*leftres)[roffset]);
1084      roffset=m_samplesize*tid;
1085      double* result=&(m_samples[roffset]);
1086      switch (m_op)
1087      {
1088        case SIN:  
1089        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1090      break;      break;
1091      case COS:      case COS:
1092      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1093      break;      break;
1094      case TAN:      case TAN:
1095      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1096      break;      break;
1097      case ASIN:      case ASIN:
1098      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1099      break;      break;
1100      case ACOS:      case ACOS:
1101      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1102      break;      break;
1103      case ATAN:      case ATAN:
1104      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1105      break;      break;
1106      case SINH:      case SINH:
1107      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1108      break;      break;
1109      case COSH:      case COSH:
1110      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1111      break;      break;
1112      case TANH:      case TANH:
1113      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1114      break;      break;
1115      case ERF:      case ERF:
1116  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1117      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1118  #else  #else
1119      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1120      break;      break;
1121  #endif  #endif
1122     case ASINH:     case ASINH:
1123  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1124      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1125  #else  #else
1126      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1127  #endif    #endif  
1128      break;      break;
1129     case ACOSH:     case ACOSH:
1130  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1131      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1132  #else  #else
1133      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1134  #endif    #endif  
1135      break;      break;
1136     case ATANH:     case ATANH:
1137  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1138      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1139  #else  #else
1140      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1141  #endif    #endif  
1142      break;      break;
1143      case LOG10:      case LOG10:
1144      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1145      break;      break;
1146      case LOG:      case LOG:
1147      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1148      break;      break;
1149      case SIGN:      case SIGN:
1150      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1151      break;      break;
1152      case ABS:      case ABS:
1153      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1154      break;      break;
1155      case NEG:      case NEG:
1156      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 357  DataLazy::resolveSample(ValueType& v,int Line 1161  DataLazy::resolveSample(ValueType& v,int
1161      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1162      break;      break;
1163      case EXP:      case EXP:
1164      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1165      break;      break;
1166      case SQRT:      case SQRT:
1167      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1168      break;      break;
1169      case RECIP:      case RECIP:
1170      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 377  DataLazy::resolveSample(ValueType& v,int Line 1181  DataLazy::resolveSample(ValueType& v,int
1181      case LEZ:      case LEZ:
1182      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1183      break;      break;
1184    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1185        case NEZ:
1186        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1187        break;
1188        case EZ:
1189        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1190        break;
1191    
1192      default:      default:
1193      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1194      }    }
1195      return &(m_samples);
1196    }
1197    
1198    
1199    const DataTypes::ValueType*
1200    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1201    {
1202        // we assume that any collapsing has been done before we get here
1203        // since we only have one argument we don't need to think about only
1204        // processing single points.
1205        // we will also know we won't get identity nodes
1206      if (m_readytype!='E')
1207      {
1208        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1209      }
1210      if (m_op==IDENTITY)
1211      {
1212        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1213      }
1214      size_t loffset=0;
1215      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1216    
1217      roffset=m_samplesize*tid;
1218      unsigned int ndpps=getNumDPPSample();
1219      unsigned int psize=DataTypes::noValues(m_left->getShape());
1220      double* result=&(m_samples[roffset]);
1221      switch (m_op)
1222      {
1223        case MINVAL:
1224        {
1225          for (unsigned int z=0;z<ndpps;++z)
1226          {
1227            FMin op;
1228            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1229            loffset+=psize;
1230            result++;
1231          }
1232        }
1233        break;
1234        case MAXVAL:
1235        {
1236          for (unsigned int z=0;z<ndpps;++z)
1237          {
1238          FMax op;
1239          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1240          loffset+=psize;
1241          result++;
1242          }
1243        }
1244        break;
1245        default:
1246        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1247      }
1248      return &(m_samples);
1249    }
1250    
1251    const DataTypes::ValueType*
1252    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1253    {
1254        // we assume that any collapsing has been done before we get here
1255        // since we only have one argument we don't need to think about only
1256        // processing single points.
1257      if (m_readytype!='E')
1258      {
1259        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1260    }    }
1261    return result;    if (m_op==IDENTITY)
1262      {
1263        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1264      }
1265      size_t subroffset;
1266      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1267      roffset=m_samplesize*tid;
1268      size_t loop=0;
1269      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1270      size_t step=getNoValues();
1271      size_t offset=roffset;
1272      switch (m_op)
1273      {
1274        case SYM:
1275        for (loop=0;loop<numsteps;++loop)
1276        {
1277            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1278            subroffset+=step;
1279            offset+=step;
1280        }
1281        break;
1282        case NSYM:
1283        for (loop=0;loop<numsteps;++loop)
1284        {
1285            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1286            subroffset+=step;
1287            offset+=step;
1288        }
1289        break;
1290        default:
1291        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1292      }
1293      return &m_samples;
1294    }
1295    
1296    const DataTypes::ValueType*
1297    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1298    {
1299        // we assume that any collapsing has been done before we get here
1300        // since we only have one argument we don't need to think about only
1301        // processing single points.
1302      if (m_readytype!='E')
1303      {
1304        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1305      }
1306      if (m_op==IDENTITY)
1307      {
1308        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1309      }
1310      size_t subroffset;
1311      size_t offset;
1312      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1313      roffset=m_samplesize*tid;
1314      offset=roffset;
1315      size_t loop=0;
1316      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1317      size_t outstep=getNoValues();
1318      size_t instep=m_left->getNoValues();
1319      switch (m_op)
1320      {
1321        case TRACE:
1322        for (loop=0;loop<numsteps;++loop)
1323        {
1324                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1325            subroffset+=instep;
1326            offset+=outstep;
1327        }
1328        break;
1329        case TRANS:
1330        for (loop=0;loop<numsteps;++loop)
1331        {
1332                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1333            subroffset+=instep;
1334            offset+=outstep;
1335        }
1336        break;
1337        default:
1338        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1339      }
1340      return &m_samples;
1341    }
1342    
1343    
1344    const DataTypes::ValueType*
1345    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1346    {
1347      if (m_readytype!='E')
1348      {
1349        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1350      }
1351      if (m_op==IDENTITY)
1352      {
1353        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1354      }
1355      size_t subroffset;
1356      size_t offset;
1357      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1358      roffset=m_samplesize*tid;
1359      offset=roffset;
1360      size_t loop=0;
1361      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1362      size_t outstep=getNoValues();
1363      size_t instep=m_left->getNoValues();
1364      switch (m_op)
1365      {
1366        case SWAP:
1367        for (loop=0;loop<numsteps;++loop)
1368        {
1369                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1370            subroffset+=instep;
1371            offset+=outstep;
1372        }
1373        break;
1374        default:
1375        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1376      }
1377      return &m_samples;
1378    }
1379    
1380    const DataTypes::ValueType*
1381    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1382    {
1383      if (m_readytype!='E')
1384      {
1385        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1386      }
1387      if (m_op!=CONDEVAL)
1388      {
1389        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1390      }
1391      size_t subroffset;
1392    
1393      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1394      const ValueType* srcres=0;
1395      if ((*maskres)[subroffset]>0)
1396      {
1397        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1398      }
1399      else
1400      {
1401        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1402      }
1403    
1404      // Now we need to copy the result
1405    
1406      roffset=m_samplesize*tid;
1407      for (int i=0;i<m_samplesize;++i)
1408      {
1409        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1410      }
1411    
1412      return &m_samples;
1413    }
1414    
1415    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1416    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1417    // If both children are expanded, then we can process them in a single operation (we treat
1418    // the whole sample as one big datapoint.
1419    // If one of the children is not expanded, then we need to treat each point in the sample
1420    // individually.
1421    // There is an additional complication when scalar operations are considered.
1422    // For example, 2+Vector.
1423    // In this case each double within the point is treated individually
1424    const DataTypes::ValueType*
1425    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1426    {
1427    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1428    
1429      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1430        // first work out which of the children are expanded
1431      bool leftExp=(m_left->m_readytype=='E');
1432      bool rightExp=(m_right->m_readytype=='E');
1433      if (!leftExp && !rightExp)
1434      {
1435        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1436      }
1437      bool leftScalar=(m_left->getRank()==0);
1438      bool rightScalar=(m_right->getRank()==0);
1439      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1440      {
1441        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1442      }
1443      size_t leftsize=m_left->getNoValues();
1444      size_t rightsize=m_right->getNoValues();
1445      size_t chunksize=1;           // how many doubles will be processed in one go
1446      int leftstep=0;       // how far should the left offset advance after each step
1447      int rightstep=0;
1448      int numsteps=0;       // total number of steps for the inner loop
1449      int oleftstep=0;  // the o variables refer to the outer loop
1450      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1451      int onumsteps=1;
1452      
1453      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1454      bool RES=(rightExp && rightScalar);
1455      bool LS=(!leftExp && leftScalar); // left is a single scalar
1456      bool RS=(!rightExp && rightScalar);
1457      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1458      bool RN=(!rightExp && !rightScalar);
1459      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1460      bool REN=(rightExp && !rightScalar);
1461    
1462      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1463      {
1464        chunksize=m_left->getNumDPPSample()*leftsize;
1465        leftstep=0;
1466        rightstep=0;
1467        numsteps=1;
1468      }
1469      else if (LES || RES)
1470      {
1471        chunksize=1;
1472        if (LES)        // left is an expanded scalar
1473        {
1474            if (RS)
1475            {
1476               leftstep=1;
1477               rightstep=0;
1478               numsteps=m_left->getNumDPPSample();
1479            }
1480            else        // RN or REN
1481            {
1482               leftstep=0;
1483               oleftstep=1;
1484               rightstep=1;
1485               orightstep=(RN ? -(int)rightsize : 0);
1486               numsteps=rightsize;
1487               onumsteps=m_left->getNumDPPSample();
1488            }
1489        }
1490        else        // right is an expanded scalar
1491        {
1492            if (LS)
1493            {
1494               rightstep=1;
1495               leftstep=0;
1496               numsteps=m_right->getNumDPPSample();
1497            }
1498            else
1499            {
1500               rightstep=0;
1501               orightstep=1;
1502               leftstep=1;
1503               oleftstep=(LN ? -(int)leftsize : 0);
1504               numsteps=leftsize;
1505               onumsteps=m_right->getNumDPPSample();
1506            }
1507        }
1508      }
1509      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1510      {
1511        if (LEN)    // and Right will be a single value
1512        {
1513            chunksize=rightsize;
1514            leftstep=rightsize;
1515            rightstep=0;
1516            numsteps=m_left->getNumDPPSample();
1517            if (RS)
1518            {
1519               numsteps*=leftsize;
1520            }
1521        }
1522        else    // REN
1523        {
1524            chunksize=leftsize;
1525            rightstep=leftsize;
1526            leftstep=0;
1527            numsteps=m_right->getNumDPPSample();
1528            if (LS)
1529            {
1530               numsteps*=rightsize;
1531            }
1532        }
1533      }
1534    
1535      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1536        // Get the values of sub-expressions
1537      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1538      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1539    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1540    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1541    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1542    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1543    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1544    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1545    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1546    
1547    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1548    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1549    
1550    
1551      roffset=m_samplesize*tid;
1552      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1553      switch(m_op)
1554      {
1555        case ADD:
1556            PROC_OP(NO_ARG,plus<double>());
1557        break;
1558        case SUB:
1559        PROC_OP(NO_ARG,minus<double>());
1560        break;
1561        case MUL:
1562        PROC_OP(NO_ARG,multiplies<double>());
1563        break;
1564        case DIV:
1565        PROC_OP(NO_ARG,divides<double>());
1566        break;
1567        case POW:
1568           PROC_OP(double (double,double),::pow);
1569        break;
1570        default:
1571        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1572      }
1573    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1574      return &m_samples;
1575    }
1576    
1577    
1578    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1579    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1580    // unlike the other resolve helpers, we must treat these datapoints separately.
1581    const DataTypes::ValueType*
1582    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1583    {
1584    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1585    
1586      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1587        // first work out which of the children are expanded
1588      bool leftExp=(m_left->m_readytype=='E');
1589      bool rightExp=(m_right->m_readytype=='E');
1590      int steps=getNumDPPSample();
1591      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1592      int rightStep=(rightExp?m_right->getNoValues() : 0);
1593    
1594      int resultStep=getNoValues();
1595      roffset=m_samplesize*tid;
1596      size_t offset=roffset;
1597    
1598      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1599    
1600      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1601    
1602    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1603    cout << getNoValues() << endl;)
1604    
1605    
1606    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1607    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1608    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1609    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1610    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1611    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1612    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1613    
1614      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1615      switch(m_op)
1616      {
1617        case PROD:
1618        for (int i=0;i<steps;++i,resultp+=resultStep)
1619        {
1620              const double *ptr_0 = &((*left)[lroffset]);
1621              const double *ptr_1 = &((*right)[rroffset]);
1622    
1623    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1624    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1625    
1626              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1627    
1628          lroffset+=leftStep;
1629          rroffset+=rightStep;
1630        }
1631        break;
1632        default:
1633        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1634      }
1635      roffset=offset;
1636      return &m_samples;
1637    }
1638    
1639    
1640    const DataTypes::ValueType*
1641    DataLazy::resolveSample(int sampleNo, size_t& roffset)
1642    {
1643    #ifdef _OPENMP
1644        int tid=omp_get_thread_num();
1645    #else
1646        int tid=0;
1647    #endif
1648    
1649    #ifdef LAZY_STACK_PROF
1650        stackstart[tid]=&tid;
1651        stackend[tid]=&tid;
1652        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1653        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1654        #pragma omp critical
1655        if (d>maxstackuse)
1656        {
1657    cout << "Max resolve Stack use " << d << endl;
1658            maxstackuse=d;
1659        }
1660        return r;
1661    #else
1662        return resolveNodeSample(tid, sampleNo, roffset);
1663    #endif
1664    }
1665    
1666    
1667    // This needs to do the work of the identity constructor
1668    void
1669    DataLazy::resolveToIdentity()
1670    {
1671       if (m_op==IDENTITY)
1672        return;
1673       DataReady_ptr p=resolveNodeWorker();
1674       makeIdentity(p);
1675  }  }
1676    
1677    void DataLazy::makeIdentity(const DataReady_ptr& p)
1678    {
1679       m_op=IDENTITY;
1680       m_axis_offset=0;
1681       m_transpose=0;
1682       m_SL=m_SM=m_SR=0;
1683       m_children=m_height=0;
1684       m_id=p;
1685       if(p->isConstant()) {m_readytype='C';}
1686       else if(p->isExpanded()) {m_readytype='E';}
1687       else if (p->isTagged()) {m_readytype='T';}
1688       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1689       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1690       m_left.reset();
1691       m_right.reset();
1692    }
1693    
1694    
1695  DataReady_ptr  DataReady_ptr
1696  DataLazy::resolve()  DataLazy::resolve()
1697  {  {
1698    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1699    // so the allocation of v needs to move inside the loop somehow      return m_id;
1700    }
1701    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1702    
1703    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1704    int numthreads=1;  void
1705    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1706    {
1707      if (dats.empty())
1708      {
1709        return;
1710      }
1711      vector<DataLazy*> work;
1712      FunctionSpace fs=dats[0]->getFunctionSpace();
1713      bool match=true;
1714      for (int i=dats.size()-1;i>=0;--i)
1715      {
1716        if (dats[i]->m_readytype!='E')
1717        {
1718            dats[i]->collapse();
1719        }
1720        if (dats[i]->m_op!=IDENTITY)
1721        {
1722            work.push_back(dats[i]);
1723            if (fs!=dats[i]->getFunctionSpace())
1724            {
1725                match=false;
1726            }
1727        }
1728      }
1729      if (work.empty())
1730      {
1731        return;     // no work to do
1732      }
1733      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1734      {     // it is possible that dats[0] is one of the objects which we discarded and
1735            // all the other functionspaces match.
1736        vector<DataExpanded*> dep;
1737        vector<ValueType*> vecs;
1738        for (int i=0;i<work.size();++i)
1739        {
1740            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1741            vecs.push_back(&(dep[i]->getVectorRW()));
1742        }
1743        int totalsamples=work[0]->getNumSamples();
1744        const ValueType* res=0; // Storage for answer
1745        int sample;
1746        #pragma omp parallel private(sample, res)
1747        {
1748            size_t roffset=0;
1749            #pragma omp for schedule(static)
1750            for (sample=0;sample<totalsamples;++sample)
1751            {
1752            roffset=0;
1753            int j;
1754            for (j=work.size()-1;j>=0;--j)
1755            {
1756  #ifdef _OPENMP  #ifdef _OPENMP
1757    numthreads=omp_get_max_threads();                  res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1758    int threadnum=0;  #else
1759  #endif                  res=work[j]->resolveNodeSample(0,sample,roffset);
1760    ValueType v(numthreads*threadbuffersize);  #endif
1761  cout << "Buffer created with size=" << v.size() << endl;                  DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1762    ValueType dummy(getNoValues());                  memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1763    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);          }
1764    ValueType& resvec=result->getVector();          }
1765        }
1766        // Now we need to load the new results as identity ops into the lazy nodes
1767        for (int i=work.size()-1;i>=0;--i)
1768        {
1769            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1770        }
1771      }
1772      else  // functionspaces do not match
1773      {
1774        for (int i=0;i<work.size();++i)
1775        {
1776            work[i]->resolveToIdentity();
1777        }
1778      }
1779    }
1780    
1781    
1782    
1783    // This version of resolve uses storage in each node to hold results
1784    DataReady_ptr
1785    DataLazy::resolveNodeWorker()
1786    {
1787      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1788      {
1789        collapse();
1790      }
1791      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1792      {
1793        return m_id;
1794      }
1795        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1796      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1797      ValueType& resvec=result->getVectorRW();
1798    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1799    
1800    int sample;    int sample;
   int resoffset;  
1801    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1802    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;   // Storage for answer
1803    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1804      #pragma omp parallel private(sample,res)
1805    {    {
1806        size_t roffset=0;
1807    #ifdef LAZY_STACK_PROF
1808        stackstart[omp_get_thread_num()]=&roffset;
1809        stackend[omp_get_thread_num()]=&roffset;
1810    #endif
1811        #pragma omp for schedule(static)
1812        for (sample=0;sample<totalsamples;++sample)
1813        {
1814            roffset=0;
1815  #ifdef _OPENMP  #ifdef _OPENMP
1816      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1817  #else  #else
1818      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1819  #endif  #endif
1820      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1821      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1822      {              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1823      resvec[resoffset]=res[i];              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1824      }      }
1825      }
1826    #ifdef LAZY_STACK_PROF
1827      for (int i=0;i<getNumberOfThreads();++i)
1828      {
1829        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1830    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1831        if (r>maxstackuse)
1832        {
1833            maxstackuse=r;
1834        }
1835    }    }
1836      cout << "Max resolve Stack use=" << maxstackuse << endl;
1837    #endif
1838    return resptr;    return resptr;
1839  }  }
1840    
# Line 430  std::string Line 1842  std::string
1842  DataLazy::toString() const  DataLazy::toString() const
1843  {  {
1844    ostringstream oss;    ostringstream oss;
1845    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1846    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1847      {
1848      case 1:   // tree format
1849        oss << endl;
1850        intoTreeString(oss,"");
1851        break;
1852      case 2:   // just the depth
1853        break;
1854      default:
1855        intoString(oss);
1856        break;
1857      }
1858    return oss.str();    return oss.str();
1859  }  }
1860    
1861    
1862  void  void
1863  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1864  {  {
1865    //    oss << "[" << m_children <<";"<<m_height <<"]";
1866    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1867    {    {
1868    case G_IDENTITY:    case G_IDENTITY:
1869        if (m_id->isExpanded())
1870        {
1871           oss << "E";
1872        }
1873        else if (m_id->isTagged())
1874        {
1875          oss << "T";
1876        }
1877        else if (m_id->isConstant())
1878        {
1879          oss << "C";
1880        }
1881        else
1882        {
1883          oss << "?";
1884        }
1885      oss << '@' << m_id.get();      oss << '@' << m_id.get();
1886      break;      break;
1887    case G_BINARY:    case G_BINARY:
# Line 451  DataLazy::intoString(ostringstream& oss) Line 1892  DataLazy::intoString(ostringstream& oss)
1892      oss << ')';      oss << ')';
1893      break;      break;
1894    case G_UNARY:    case G_UNARY:
1895      case G_UNARY_P:
1896      case G_NP1OUT:
1897      case G_NP1OUT_P:
1898      case G_REDUCTION:
1899        oss << opToString(m_op) << '(';
1900        m_left->intoString(oss);
1901        oss << ')';
1902        break;
1903      case G_TENSORPROD:
1904        oss << opToString(m_op) << '(';
1905        m_left->intoString(oss);
1906        oss << ", ";
1907        m_right->intoString(oss);
1908        oss << ')';
1909        break;
1910      case G_NP1OUT_2P:
1911      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1912      m_left->intoString(oss);      m_left->intoString(oss);
1913        oss << ", " << m_axis_offset << ", " << m_transpose;
1914        oss << ')';
1915        break;
1916      case G_CONDEVAL:
1917        oss << opToString(m_op)<< '(' ;
1918        m_mask->intoString(oss);
1919        oss << " ? ";
1920        m_left->intoString(oss);
1921        oss << " : ";
1922        m_right->intoString(oss);
1923      oss << ')';      oss << ')';
1924      break;      break;
1925    default:    default:
# Line 460  DataLazy::intoString(ostringstream& oss) Line 1927  DataLazy::intoString(ostringstream& oss)
1927    }    }
1928  }  }
1929    
1930  // Note that in this case, deepCopy does not make copies of the leaves.  
1931  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1932    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1933    {
1934      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1935      switch (getOpgroup(m_op))
1936      {
1937      case G_IDENTITY:
1938        if (m_id->isExpanded())
1939        {
1940           oss << "E";
1941        }
1942        else if (m_id->isTagged())
1943        {
1944          oss << "T";
1945        }
1946        else if (m_id->isConstant())
1947        {
1948          oss << "C";
1949        }
1950        else
1951        {
1952          oss << "?";
1953        }
1954        oss << '@' << m_id.get() << endl;
1955        break;
1956      case G_BINARY:
1957        oss << opToString(m_op) << endl;
1958        indent+='.';
1959        m_left->intoTreeString(oss, indent);
1960        m_right->intoTreeString(oss, indent);
1961        break;
1962      case G_UNARY:
1963      case G_UNARY_P:
1964      case G_NP1OUT:
1965      case G_NP1OUT_P:
1966      case G_REDUCTION:
1967        oss << opToString(m_op) << endl;
1968        indent+='.';
1969        m_left->intoTreeString(oss, indent);
1970        break;
1971      case G_TENSORPROD:
1972        oss << opToString(m_op) << endl;
1973        indent+='.';
1974        m_left->intoTreeString(oss, indent);
1975        m_right->intoTreeString(oss, indent);
1976        break;
1977      case G_NP1OUT_2P:
1978        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1979        indent+='.';
1980        m_left->intoTreeString(oss, indent);
1981        break;
1982      default:
1983        oss << "UNKNOWN";
1984      }
1985    }
1986    
1987    
1988  DataAbstract*  DataAbstract*
1989  DataLazy::deepCopy()  DataLazy::deepCopy()
1990  {  {
1991    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1992    {    {
1993      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1994      case G_UNARY:
1995      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1996      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1997      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1998      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1999      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2000      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2001      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2002      default:
2003        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2004    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2005  }  }
2006    
2007    
2008    
2009    // There is no single, natural interpretation of getLength on DataLazy.
2010    // Instances of DataReady can look at the size of their vectors.
2011    // For lazy though, it could be the size the data would be if it were resolved;
2012    // or it could be some function of the lengths of the DataReady instances which
2013    // form part of the expression.
2014    // Rather than have people making assumptions, I have disabled the method.
2015  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2016  DataLazy::getLength() const  DataLazy::getLength() const
2017  {  {
2018    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2019  }  }
2020    
2021    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 2025  DataLazy::getSlice(const DataTypes::Regi
2025    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2026  }  }
2027    
2028    
2029    // To do this we need to rely on our child nodes
2030    DataTypes::ValueType::size_type
2031    DataLazy::getPointOffset(int sampleNo,
2032                     int dataPointNo)
2033    {
2034      if (m_op==IDENTITY)
2035      {
2036        return m_id->getPointOffset(sampleNo,dataPointNo);
2037      }
2038      if (m_readytype!='E')
2039      {
2040        collapse();
2041        return m_id->getPointOffset(sampleNo,dataPointNo);
2042      }
2043      // at this point we do not have an identity node and the expression will be Expanded
2044      // so we only need to know which child to ask
2045      if (m_left->m_readytype=='E')
2046      {
2047        return m_left->getPointOffset(sampleNo,dataPointNo);
2048      }
2049      else
2050      {
2051        return m_right->getPointOffset(sampleNo,dataPointNo);
2052      }
2053    }
2054    
2055    // To do this we need to rely on our child nodes
2056  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2057  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2058                   int dataPointNo) const                   int dataPointNo) const
2059  {  {
2060    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2061      {
2062        return m_id->getPointOffset(sampleNo,dataPointNo);
2063      }
2064      if (m_readytype=='E')
2065      {
2066        // at this point we do not have an identity node and the expression will be Expanded
2067        // so we only need to know which child to ask
2068        if (m_left->m_readytype=='E')
2069        {
2070        return m_left->getPointOffset(sampleNo,dataPointNo);
2071        }
2072        else
2073        {
2074        return m_right->getPointOffset(sampleNo,dataPointNo);
2075        }
2076      }
2077      if (m_readytype=='C')
2078      {
2079        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2080      }
2081      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2082    }
2083    
2084    
2085    // I have decided to let Data:: handle this issue.
2086    void
2087    DataLazy::setToZero()
2088    {
2089    //   DataTypes::ValueType v(getNoValues(),0);
2090    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2091    //   m_op=IDENTITY;
2092    //   m_right.reset();  
2093    //   m_left.reset();
2094    //   m_readytype='C';
2095    //   m_buffsRequired=1;
2096    
2097      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2098      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2099    }
2100    
2101    bool
2102    DataLazy::actsExpanded() const
2103    {
2104        return (m_readytype=='E');
2105  }  }
2106    
2107  }   // end namespace  }   // end namespace

Legend:
Removed from v.1888  
changed lines
  Added in v.4507

  ViewVC Help
Powered by ViewVC 1.1.26