/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escript/src/DataLazy.cpp revision 3917 by jfenwick, Thu Jul 5 00:17:50 2012 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2012 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 13  Line 13 
13    
14    
15  #include "DataLazy.h"  #include "DataLazy.h"
16  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
17  #ifdef _OPENMP  #ifdef _OPENMP
18  #include <omp.h>  #include <omp.h>
19  #endif  #endif
# Line 26  Line 21 
21  #include "DataTypes.h"  #include "DataTypes.h"
22  #include "Data.h"  #include "Data.h"
23  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
24    #include "Utils.h"
25    
26    #include "EscriptParams.h"
27    
28    #ifdef USE_NETCDF
29    #include <netcdfcpp.h>
30    #endif
31    
32    #include <iomanip>      // for some fancy formatting in debug
33    
34    // #define LAZYDEBUG(X) if (privdebug){X;}
35    #define LAZYDEBUG(X)
36    namespace
37    {
38    bool privdebug=false;
39    
40    #define ENABLEDEBUG privdebug=true;
41    #define DISABLEDEBUG privdebug=false;
42    }
43    
44    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
45    
46    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
47    
48    
49    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
50    
51    /*
52    How does DataLazy work?
53    ~~~~~~~~~~~~~~~~~~~~~~~
54    
55    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
56    denoted left and right.
57    
58    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
59    This means that all "internal" nodes in the structure are instances of DataLazy.
60    
61    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
62    Note that IDENITY is not considered a unary operation.
63    
64    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
65    It must however form a DAG (directed acyclic graph).
66    I will refer to individual DataLazy objects with the structure as nodes.
67    
68    Each node also stores:
69    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
70        evaluated.
71    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
72        evaluate the expression.
73    - m_samplesize ~ the number of doubles stored in a sample.
74    
75    When a new node is created, the above values are computed based on the values in the child nodes.
76    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
77    
78    The resolve method, which produces a DataReady from a DataLazy, does the following:
79    1) Create a DataReady to hold the new result.
80    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
81    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
82    
83    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
84    
85    resolveSample returns a Vector* and an offset within that vector where the result is stored.
86    Normally, this would be v, but for identity nodes their internal vector is returned instead.
87    
88    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
89    
90    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
91    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
92    
93    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
94    1) Add to the ES_optype.
95    2) determine what opgroup your operation belongs to (X)
96    3) add a string for the op to the end of ES_opstrings
97    4) increase ES_opcount
98    5) add an entry (X) to opgroups
99    6) add an entry to the switch in collapseToReady
100    7) add an entry to resolveX
101    */
102    
103    
104  using namespace std;  using namespace std;
105  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 107  using namespace boost;
107  namespace escript  namespace escript
108  {  {
109    
 const std::string&  
 opToString(ES_optype op);  
   
110  namespace  namespace
111  {  {
112    
113    
114    // enabling this will print out when ever the maximum stacksize used by resolve increases
115    // it assumes _OPENMP is also in use
116    //#define LAZY_STACK_PROF
117    
118    
119    
120    #ifndef _OPENMP
121      #ifdef LAZY_STACK_PROF
122      #undef LAZY_STACK_PROF
123      #endif
124    #endif
125    
126    
127    #ifdef LAZY_STACK_PROF
128    std::vector<void*> stackstart(getNumberOfThreads());
129    std::vector<void*> stackend(getNumberOfThreads());
130    size_t maxstackuse=0;
131    #endif
132    
133  enum ES_opgroup  enum ES_opgroup
134  {  {
135     G_UNKNOWN,     G_UNKNOWN,
136     G_IDENTITY,     G_IDENTITY,
137     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
138     G_UNARY     G_UNARY,     // pointwise operations with one argument
139       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
140       G_NP1OUT,        // non-pointwise op with one output
141       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
142       G_TENSORPROD,    // general tensor product
143       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
144       G_REDUCTION,     // non-pointwise unary op with a scalar output
145       G_CONDEVAL
146  };  };
147    
148    
149    
150    
151  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
152                "sin","cos","tan",
153              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
154              "asinh","acosh","atanh",              "asinh","acosh","atanh",
155              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
156              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
157  int ES_opcount=32;              "symmetric","nonsymmetric",
158  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
159              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose", "trace",
160              G_UNARY,G_UNARY,G_UNARY,                    // 19              "swapaxes",
161              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27              "minval", "maxval",
162              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              "condEval"};
163    int ES_opcount=44;
164    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
165                G_UNARY,G_UNARY,G_UNARY, //10
166                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
167                G_UNARY,G_UNARY,G_UNARY,                    // 20
168                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
169                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
170                G_NP1OUT,G_NP1OUT,
171                G_TENSORPROD,
172                G_NP1OUT_P, G_NP1OUT_P,
173                G_NP1OUT_2P,
174                G_REDUCTION, G_REDUCTION,
175                G_CONDEVAL};
176  inline  inline
177  ES_opgroup  ES_opgroup
178  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 189  resultFS(DataAbstract_ptr left, DataAbst
189      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
190      // programming error exception.      // programming error exception.
191    
192      FunctionSpace l=left->getFunctionSpace();
193      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
194      {    if (l!=r)
195          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
196      }      if (r.probeInterpolation(l))
197      return left->getFunctionSpace();      {
198        return l;
199        }
200        if (l.probeInterpolation(r))
201        {
202        return r;
203        }
204        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
205      }
206      return l;
207  }  }
208    
209  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
210    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
211  DataTypes::ShapeType  DataTypes::ShapeType
212  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
213  {  {
214      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
215      {      {
216          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
217          {
218            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
219          }
220    
221          if (left->getRank()==0)   // we need to allow scalar * anything
222          {
223            return right->getShape();
224          }
225          if (right->getRank()==0)
226          {
227            return left->getShape();
228          }
229          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
230      }      }
231      return left->getShape();      return left->getShape();
232  }  }
233    
234  size_t  // return the shape for "op left"
235  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
236    DataTypes::ShapeType
237    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
238  {  {
239     switch (getOpgroup(op))      switch(op)
240     {      {
241     case G_BINARY: return left->getLength();          case TRANS:
242     case G_UNARY: return left->getLength();         {            // for the scoping of variables
243     default:          const DataTypes::ShapeType& s=left->getShape();
244      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
245     }          int rank=left->getRank();
246            if (axis_offset<0 || axis_offset>rank)
247            {
248                stringstream e;
249                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
250                throw DataException(e.str());
251            }
252            for (int i=0; i<rank; i++)
253            {
254               int index = (axis_offset+i)%rank;
255               sh.push_back(s[index]); // Append to new shape
256            }
257            return sh;
258           }
259        break;
260        case TRACE:
261           {
262            int rank=left->getRank();
263            if (rank<2)
264            {
265               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
266            }
267            if ((axis_offset>rank-2) || (axis_offset<0))
268            {
269               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
270            }
271            if (rank==2)
272            {
273               return DataTypes::scalarShape;
274            }
275            else if (rank==3)
276            {
277               DataTypes::ShapeType sh;
278                   if (axis_offset==0)
279               {
280                    sh.push_back(left->getShape()[2]);
281                   }
282                   else     // offset==1
283               {
284                sh.push_back(left->getShape()[0]);
285                   }
286               return sh;
287            }
288            else if (rank==4)
289            {
290               DataTypes::ShapeType sh;
291               const DataTypes::ShapeType& s=left->getShape();
292                   if (axis_offset==0)
293               {
294                    sh.push_back(s[2]);
295                    sh.push_back(s[3]);
296                   }
297                   else if (axis_offset==1)
298               {
299                    sh.push_back(s[0]);
300                    sh.push_back(s[3]);
301                   }
302               else     // offset==2
303               {
304                sh.push_back(s[0]);
305                sh.push_back(s[1]);
306               }
307               return sh;
308            }
309            else        // unknown rank
310            {
311               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
312            }
313           }
314        break;
315            default:
316        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
317        }
318  }  }
319    
320  int  DataTypes::ShapeType
321  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
322  {  {
323     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
324     {       // Some of the checks are probably redundant here
325     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
326     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
327     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
328     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
329      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
330     }       int rank=left->getRank();
331         if (rank<2) {
332            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
333         }
334         if (axis0<0 || axis0>rank-1) {
335            stringstream e;
336            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
337            throw DataException(e.str());
338         }
339         if (axis1<0 || axis1>rank-1) {
340            stringstream e;
341            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
342            throw DataException(e.str());
343         }
344         if (axis0 == axis1) {
345             throw DataException("Error - Data::swapaxes: axis indices must be different.");
346         }
347         if (axis0 > axis1) {
348             axis0_tmp=axis1;
349             axis1_tmp=axis0;
350         } else {
351             axis0_tmp=axis0;
352             axis1_tmp=axis1;
353         }
354         for (int i=0; i<rank; i++) {
355           if (i == axis0_tmp) {
356              out_shape.push_back(s[axis1_tmp]);
357           } else if (i == axis1_tmp) {
358              out_shape.push_back(s[axis0_tmp]);
359           } else {
360              out_shape.push_back(s[i]);
361           }
362         }
363        return out_shape;
364  }  }
365    
366    
367    // determine the output shape for the general tensor product operation
368    // the additional parameters return information required later for the product
369    // the majority of this code is copy pasted from C_General_Tensor_Product
370    DataTypes::ShapeType
371    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
372    {
373        
374      // Get rank and shape of inputs
375      int rank0 = left->getRank();
376      int rank1 = right->getRank();
377      const DataTypes::ShapeType& shape0 = left->getShape();
378      const DataTypes::ShapeType& shape1 = right->getShape();
379    
380      // Prepare for the loops of the product and verify compatibility of shapes
381      int start0=0, start1=0;
382      if (transpose == 0)       {}
383      else if (transpose == 1)  { start0 = axis_offset; }
384      else if (transpose == 2)  { start1 = rank1-axis_offset; }
385      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
386    
387      if (rank0<axis_offset)
388      {
389        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
390      }
391    
392      // Adjust the shapes for transpose
393      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
394      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
395      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
396      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
397    
398      // Prepare for the loops of the product
399      SL=1, SM=1, SR=1;
400      for (int i=0; i<rank0-axis_offset; i++)   {
401        SL *= tmpShape0[i];
402      }
403      for (int i=rank0-axis_offset; i<rank0; i++)   {
404        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
405          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
406        }
407        SM *= tmpShape0[i];
408      }
409      for (int i=axis_offset; i<rank1; i++)     {
410        SR *= tmpShape1[i];
411      }
412    
413      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
414      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
415      {         // block to limit the scope of out_index
416         int out_index=0;
417         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
418         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
419      }
420    
421      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
422      {
423         ostringstream os;
424         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
425         throw DataException(os.str());
426      }
427    
428      return shape2;
429    }
430    
431  }   // end anonymous namespace  }   // end anonymous namespace
432    
433    
434    
435    // Return a string representing the operation
436  const std::string&  const std::string&
437  opToString(ES_optype op)  opToString(ES_optype op)
438  {  {
# Line 138  opToString(ES_optype op) Line 443  opToString(ES_optype op)
443    return ES_opstrings[op];    return ES_opstrings[op];
444  }  }
445    
446    void DataLazy::LazyNodeSetup()
447    {
448    #ifdef _OPENMP
449        int numthreads=omp_get_max_threads();
450        m_samples.resize(numthreads*m_samplesize);
451        m_sampleids=new int[numthreads];
452        for (int i=0;i<numthreads;++i)
453        {
454            m_sampleids[i]=-1;  
455        }
456    #else
457        m_samples.resize(m_samplesize);
458        m_sampleids=new int[1];
459        m_sampleids[0]=-1;
460    #endif  // _OPENMP
461    }
462    
463    
464    // Creates an identity node
465  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
466      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
467      m_op(IDENTITY)      ,m_sampleids(0),
468        m_samples(1)
469  {  {
470     if (p->isLazy())     if (p->isLazy())
471     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
472      // I don't want identity of Lazy.      // I don't want identity of Lazy.
473      // Question: Why would that be so bad?      // Question: Why would that be so bad?
474      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 153  DataLazy::DataLazy(DataAbstract_ptr p) Line 476  DataLazy::DataLazy(DataAbstract_ptr p)
476     }     }
477     else     else
478     {     {
479      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
480        DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
481        makeIdentity(dr);
482    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
483     }     }
484     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
485  }  }
486    
487  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
488      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
489      m_op(op)      m_op(op),
490        m_axis_offset(0),
491        m_transpose(0),
492        m_SL(0), m_SM(0), m_SR(0)
493  {  {
494     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
495     {     {
496      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
497     }     }
498    
499     DataLazy_ptr lleft;     DataLazy_ptr lleft;
500     if (!left->isLazy())     if (!left->isLazy())
501     {     {
# Line 178  DataLazy::DataLazy(DataAbstract_ptr left Line 505  DataLazy::DataLazy(DataAbstract_ptr left
505     {     {
506      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
507     }     }
508     m_length=left->getLength();     m_readytype=lleft->m_readytype;
509     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
510     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
511       m_children=m_left->m_children+1;
512       m_height=m_left->m_height+1;
513       LazyNodeSetup();
514       SIZELIMIT
515  }  }
516    
517    
518  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
519    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
520      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
521      m_left(left),      m_op(op),
522      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
523  {  {
524     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
525       if ((getOpgroup(op)!=G_BINARY))
526     {     {
527      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
528     }     }
529     m_length=resultLength(m_left,m_right,m_op);  
530       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
531       {
532        FunctionSpace fs=getFunctionSpace();
533        Data ltemp(left);
534        Data tmp(ltemp,fs);
535        left=tmp.borrowDataPtr();
536       }
537       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
538       {
539        Data tmp(Data(right),getFunctionSpace());
540        right=tmp.borrowDataPtr();
541    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
542       }
543       left->operandCheck(*right);
544    
545       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
546       {
547        m_left=dynamic_pointer_cast<DataLazy>(left);
548    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
549       }
550       else
551       {
552        m_left=DataLazy_ptr(new DataLazy(left));
553    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
554       }
555       if (right->isLazy())
556       {
557        m_right=dynamic_pointer_cast<DataLazy>(right);
558    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
559       }
560       else
561       {
562        m_right=DataLazy_ptr(new DataLazy(right));
563    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
564       }
565       char lt=m_left->m_readytype;
566       char rt=m_right->m_readytype;
567       if (lt=='E' || rt=='E')
568       {
569        m_readytype='E';
570       }
571       else if (lt=='T' || rt=='T')
572       {
573        m_readytype='T';
574       }
575       else
576       {
577        m_readytype='C';
578       }
579     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
580     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+m_right->m_children+2;
581  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
582       LazyNodeSetup();
583       SIZELIMIT
584    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
585  }  }
586    
587  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
588      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
589      m_op(op)      m_op(op),
590        m_axis_offset(axis_offset),
591        m_transpose(transpose)
592  {  {
593     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
594       {
595        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
596       }
597       if ((transpose>2) || (transpose<0))
598       {
599        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
600       }
601       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
602     {     {
603      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");      FunctionSpace fs=getFunctionSpace();
604        Data ltemp(left);
605        Data tmp(ltemp,fs);
606        left=tmp.borrowDataPtr();
607     }     }
608     if (left->isLazy())     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
609       {
610        Data tmp(Data(right),getFunctionSpace());
611        right=tmp.borrowDataPtr();
612       }
613    //    left->operandCheck(*right);
614    
615       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
616     {     {
617      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
618     }     }
# Line 225  DataLazy::DataLazy(DataAbstract_ptr left Line 628  DataLazy::DataLazy(DataAbstract_ptr left
628     {     {
629      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
630     }     }
631       char lt=m_left->m_readytype;
632       char rt=m_right->m_readytype;
633       if (lt=='E' || rt=='E')
634       {
635        m_readytype='E';
636       }
637       else if (lt=='T' || rt=='T')
638       {
639        m_readytype='T';
640       }
641       else
642       {
643        m_readytype='C';
644       }
645       m_samplesize=getNumDPPSample()*getNoValues();
646       m_children=m_left->m_children+m_right->m_children+2;
647       m_height=max(m_left->m_height,m_right->m_height)+1;
648       LazyNodeSetup();
649       SIZELIMIT
650    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
651    }
652    
653    
654    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
655        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
656        m_op(op),
657        m_axis_offset(axis_offset),
658        m_transpose(0),
659        m_tol(0)
660    {
661       if ((getOpgroup(op)!=G_NP1OUT_P))
662       {
663        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
664       }
665       DataLazy_ptr lleft;
666       if (!left->isLazy())
667       {
668        lleft=DataLazy_ptr(new DataLazy(left));
669       }
670       else
671       {
672        lleft=dynamic_pointer_cast<DataLazy>(left);
673       }
674       m_readytype=lleft->m_readytype;
675       m_left=lleft;
676       m_samplesize=getNumDPPSample()*getNoValues();
677       m_children=m_left->m_children+1;
678       m_height=m_left->m_height+1;
679       LazyNodeSetup();
680       SIZELIMIT
681    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
682    }
683    
684    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
685        : parent(left->getFunctionSpace(), left->getShape()),
686        m_op(op),
687        m_axis_offset(0),
688        m_transpose(0),
689        m_tol(tol)
690    {
691       if ((getOpgroup(op)!=G_UNARY_P))
692       {
693        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
694       }
695       DataLazy_ptr lleft;
696       if (!left->isLazy())
697       {
698        lleft=DataLazy_ptr(new DataLazy(left));
699       }
700       else
701       {
702        lleft=dynamic_pointer_cast<DataLazy>(left);
703       }
704       m_readytype=lleft->m_readytype;
705       m_left=lleft;
706       m_samplesize=getNumDPPSample()*getNoValues();
707       m_children=m_left->m_children+1;
708       m_height=m_left->m_height+1;
709       LazyNodeSetup();
710       SIZELIMIT
711    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
712    }
713    
714    
715     m_length=resultLength(m_left,m_right,m_op);  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
716        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
717        m_op(op),
718        m_axis_offset(axis0),
719        m_transpose(axis1),
720        m_tol(0)
721    {
722       if ((getOpgroup(op)!=G_NP1OUT_2P))
723       {
724        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
725       }
726       DataLazy_ptr lleft;
727       if (!left->isLazy())
728       {
729        lleft=DataLazy_ptr(new DataLazy(left));
730       }
731       else
732       {
733        lleft=dynamic_pointer_cast<DataLazy>(left);
734       }
735       m_readytype=lleft->m_readytype;
736       m_left=lleft;
737     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
738     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+1;
739  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
740       LazyNodeSetup();
741       SIZELIMIT
742    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
743  }  }
744    
745    
746  DataLazy::~DataLazy()  namespace
747  {  {
748    
749        inline int max3(int a, int b, int c)
750        {
751        int t=(a>b?a:b);
752        return (t>c?t:c);
753    
754        }
755    }
756    
757    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
758        : parent(left->getFunctionSpace(), left->getShape()),
759        m_op(CONDEVAL),
760        m_axis_offset(0),
761        m_transpose(0),
762        m_tol(0)
763    {
764    
765       DataLazy_ptr lmask;
766       DataLazy_ptr lleft;
767       DataLazy_ptr lright;
768       if (!mask->isLazy())
769       {
770        lmask=DataLazy_ptr(new DataLazy(mask));
771       }
772       else
773       {
774        lmask=dynamic_pointer_cast<DataLazy>(mask);
775       }
776       if (!left->isLazy())
777       {
778        lleft=DataLazy_ptr(new DataLazy(left));
779       }
780       else
781       {
782        lleft=dynamic_pointer_cast<DataLazy>(left);
783       }
784       if (!right->isLazy())
785       {
786        lright=DataLazy_ptr(new DataLazy(right));
787       }
788       else
789       {
790        lright=dynamic_pointer_cast<DataLazy>(right);
791       }
792       m_readytype=lmask->m_readytype;
793       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
794       {
795        throw DataException("Programmer Error - condEval arguments must have the same readytype");
796       }
797       m_left=lleft;
798       m_right=lright;
799       m_mask=lmask;
800       m_samplesize=getNumDPPSample()*getNoValues();
801       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
802       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
803       LazyNodeSetup();
804       SIZELIMIT
805    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
806  }  }
807    
808    
809  int  
810  DataLazy::getBuffsRequired() const  DataLazy::~DataLazy()
811  {  {
812      return m_buffsRequired;     delete[] m_sampleids;
813  }  }
814    
815    
816  // the vector and the offset are a place where the method could write its data if it wishes  /*
817  // it is not obligated to do so. For example, if it has its own storage already, it can use that.    \brief Evaluates the expression using methods on Data.
818  // Hence the return value to indicate where the data is actually stored.    This does the work for the collapse method.
819  // Regardless, the storage should be assumed to be used, even if it isn't.    For reasons of efficiency do not call this method on DataExpanded nodes.
820  const double*  */
821  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  DataReady_ptr
822    DataLazy::collapseToReady()
823  {  {
824    if (m_op==IDENTITY)      if (m_readytype=='E')
825      { // this is more an efficiency concern than anything else
826        throw DataException("Programmer Error - do not use collapse on Expanded data.");
827      }
828      if (m_op==IDENTITY)
829    {    {
830      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
831    }    }
832    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
833    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
834    const double* right=0;    Data right;
835    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
836    {    {
837      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
838    }    }
839    double* result=&(v[offset]);    Data result;
840      switch(m_op)
841    {    {
842      switch(m_op)      case ADD:
843      {      result=left+right;
     case ADD:       // since these are pointwise ops, pretend each sample is one point  
     tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  
844      break;      break;
845      case SUB:            case SUB:      
846      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      result=left-right;
847      break;      break;
848      case MUL:            case MUL:      
849      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      result=left*right;
850      break;      break;
851      case DIV:            case DIV:      
852      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      result=left/right;
853      break;      break;
 // unary ops  
854      case SIN:      case SIN:
855      tensor_unary_operation(m_samplesize, left, result, ::sin);      result=left.sin();  
856      break;      break;
857      case COS:      case COS:
858      tensor_unary_operation(m_samplesize, left, result, ::cos);      result=left.cos();
859      break;      break;
860      case TAN:      case TAN:
861      tensor_unary_operation(m_samplesize, left, result, ::tan);      result=left.tan();
862      break;      break;
863      case ASIN:      case ASIN:
864      tensor_unary_operation(m_samplesize, left, result, ::asin);      result=left.asin();
865      break;      break;
866      case ACOS:      case ACOS:
867      tensor_unary_operation(m_samplesize, left, result, ::acos);      result=left.acos();
868      break;      break;
869      case ATAN:      case ATAN:
870      tensor_unary_operation(m_samplesize, left, result, ::atan);      result=left.atan();
871      break;      break;
872      case SINH:      case SINH:
873      tensor_unary_operation(m_samplesize, left, result, ::sinh);      result=left.sinh();
874      break;      break;
875      case COSH:      case COSH:
876      tensor_unary_operation(m_samplesize, left, result, ::cosh);      result=left.cosh();
877      break;      break;
878      case TANH:      case TANH:
879      tensor_unary_operation(m_samplesize, left, result, ::tanh);      result=left.tanh();
880      break;      break;
881      case ERF:      case ERF:
882  #ifdef _WIN32      result=left.erf();
883        break;
884       case ASINH:
885        result=left.asinh();
886        break;
887       case ACOSH:
888        result=left.acosh();
889        break;
890       case ATANH:
891        result=left.atanh();
892        break;
893        case LOG10:
894        result=left.log10();
895        break;
896        case LOG:
897        result=left.log();
898        break;
899        case SIGN:
900        result=left.sign();
901        break;
902        case ABS:
903        result=left.abs();
904        break;
905        case NEG:
906        result=left.neg();
907        break;
908        case POS:
909        // it doesn't mean anything for delayed.
910        // it will just trigger a deep copy of the lazy object
911        throw DataException("Programmer error - POS not supported for lazy data.");
912        break;
913        case EXP:
914        result=left.exp();
915        break;
916        case SQRT:
917        result=left.sqrt();
918        break;
919        case RECIP:
920        result=left.oneOver();
921        break;
922        case GZ:
923        result=left.wherePositive();
924        break;
925        case LZ:
926        result=left.whereNegative();
927        break;
928        case GEZ:
929        result=left.whereNonNegative();
930        break;
931        case LEZ:
932        result=left.whereNonPositive();
933        break;
934        case NEZ:
935        result=left.whereNonZero(m_tol);
936        break;
937        case EZ:
938        result=left.whereZero(m_tol);
939        break;
940        case SYM:
941        result=left.symmetric();
942        break;
943        case NSYM:
944        result=left.nonsymmetric();
945        break;
946        case PROD:
947        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
948        break;
949        case TRANS:
950        result=left.transpose(m_axis_offset);
951        break;
952        case TRACE:
953        result=left.trace(m_axis_offset);
954        break;
955        case SWAP:
956        result=left.swapaxes(m_axis_offset, m_transpose);
957        break;
958        case MINVAL:
959        result=left.minval();
960        break;
961        case MAXVAL:
962        result=left.minval();
963        break;
964        default:
965        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
966      }
967      return result.borrowReadyPtr();
968    }
969    
970    /*
971       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
972       This method uses the original methods on the Data class to evaluate the expressions.
973       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
974       the purpose of using DataLazy in the first place).
975    */
976    void
977    DataLazy::collapse()
978    {
979      if (m_op==IDENTITY)
980      {
981        return;
982      }
983      if (m_readytype=='E')
984      { // this is more an efficiency concern than anything else
985        throw DataException("Programmer Error - do not use collapse on Expanded data.");
986      }
987      m_id=collapseToReady();
988      m_op=IDENTITY;
989    }
990    
991    
992    
993    
994    
995    
996    #define PROC_OP(TYPE,X)                               \
997        for (int j=0;j<onumsteps;++j)\
998        {\
999          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1000          { \
1001    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1002    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1003             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1004    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1005             lroffset+=leftstep; \
1006             rroffset+=rightstep; \
1007          }\
1008          lroffset+=oleftstep;\
1009          rroffset+=orightstep;\
1010        }
1011    
1012    
1013    // The result will be stored in m_samples
1014    // The return value is a pointer to the DataVector, offset is the offset within the return value
1015    const DataTypes::ValueType*
1016    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1017    {
1018    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1019        // collapse so we have a 'E' node or an IDENTITY for some other type
1020      if (m_readytype!='E' && m_op!=IDENTITY)
1021      {
1022        collapse();
1023      }
1024      if (m_op==IDENTITY)  
1025      {
1026        const ValueType& vec=m_id->getVectorRO();
1027        roffset=m_id->getPointOffset(sampleNo, 0);
1028    #ifdef LAZY_STACK_PROF
1029    int x;
1030    if (&x<stackend[omp_get_thread_num()])
1031    {
1032           stackend[omp_get_thread_num()]=&x;
1033    }
1034    #endif
1035        return &(vec);
1036      }
1037      if (m_readytype!='E')
1038      {
1039        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1040      }
1041      if (m_sampleids[tid]==sampleNo)
1042      {
1043        roffset=tid*m_samplesize;
1044        return &(m_samples);        // sample is already resolved
1045      }
1046      m_sampleids[tid]=sampleNo;
1047    
1048      switch (getOpgroup(m_op))
1049      {
1050      case G_UNARY:
1051      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1052      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1053      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1054      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1055      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1056      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1057      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1058      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1059      default:
1060        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1061      }
1062    }
1063    
1064    const DataTypes::ValueType*
1065    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1066    {
1067        // we assume that any collapsing has been done before we get here
1068        // since we only have one argument we don't need to think about only
1069        // processing single points.
1070        // we will also know we won't get identity nodes
1071      if (m_readytype!='E')
1072      {
1073        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1074      }
1075      if (m_op==IDENTITY)
1076      {
1077        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1078      }
1079      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1080      const double* left=&((*leftres)[roffset]);
1081      roffset=m_samplesize*tid;
1082      double* result=&(m_samples[roffset]);
1083      switch (m_op)
1084      {
1085        case SIN:  
1086        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1087        break;
1088        case COS:
1089        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1090        break;
1091        case TAN:
1092        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1093        break;
1094        case ASIN:
1095        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1096        break;
1097        case ACOS:
1098        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1099        break;
1100        case ATAN:
1101        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1102        break;
1103        case SINH:
1104        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1105        break;
1106        case COSH:
1107        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1108        break;
1109        case TANH:
1110        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1111        break;
1112        case ERF:
1113    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1114      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1115  #else  #else
1116      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1117      break;      break;
1118  #endif  #endif
1119     case ASINH:     case ASINH:
1120  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1121      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1122  #else  #else
1123      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1124  #endif    #endif  
1125      break;      break;
1126     case ACOSH:     case ACOSH:
1127  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1128      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1129  #else  #else
1130      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1131  #endif    #endif  
1132      break;      break;
1133     case ATANH:     case ATANH:
1134  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1135      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1136  #else  #else
1137      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1138  #endif    #endif  
1139      break;      break;
1140      case LOG10:      case LOG10:
1141      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1142      break;      break;
1143      case LOG:      case LOG:
1144      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1145      break;      break;
1146      case SIGN:      case SIGN:
1147      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1148      break;      break;
1149      case ABS:      case ABS:
1150      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1151      break;      break;
1152      case NEG:      case NEG:
1153      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 357  DataLazy::resolveSample(ValueType& v,int Line 1158  DataLazy::resolveSample(ValueType& v,int
1158      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1159      break;      break;
1160      case EXP:      case EXP:
1161      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1162      break;      break;
1163      case SQRT:      case SQRT:
1164      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1165      break;      break;
1166      case RECIP:      case RECIP:
1167      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 377  DataLazy::resolveSample(ValueType& v,int Line 1178  DataLazy::resolveSample(ValueType& v,int
1178      case LEZ:      case LEZ:
1179      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1180      break;      break;
1181    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1182        case NEZ:
1183        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1184        break;
1185        case EZ:
1186        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1187        break;
1188    
1189      default:      default:
1190      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1191      }    }
1192      return &(m_samples);
1193    }
1194    
1195    
1196    const DataTypes::ValueType*
1197    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1198    {
1199        // we assume that any collapsing has been done before we get here
1200        // since we only have one argument we don't need to think about only
1201        // processing single points.
1202        // we will also know we won't get identity nodes
1203      if (m_readytype!='E')
1204      {
1205        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1206      }
1207      if (m_op==IDENTITY)
1208      {
1209        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1210      }
1211      size_t loffset=0;
1212      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1213    
1214      roffset=m_samplesize*tid;
1215      unsigned int ndpps=getNumDPPSample();
1216      unsigned int psize=DataTypes::noValues(m_left->getShape());
1217      double* result=&(m_samples[roffset]);
1218      switch (m_op)
1219      {
1220        case MINVAL:
1221        {
1222          for (unsigned int z=0;z<ndpps;++z)
1223          {
1224            FMin op;
1225            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1226            loffset+=psize;
1227            result++;
1228          }
1229        }
1230        break;
1231        case MAXVAL:
1232        {
1233          for (unsigned int z=0;z<ndpps;++z)
1234          {
1235          FMax op;
1236          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1237          loffset+=psize;
1238          result++;
1239          }
1240        }
1241        break;
1242        default:
1243        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1244      }
1245      return &(m_samples);
1246    }
1247    
1248    const DataTypes::ValueType*
1249    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1250    {
1251        // we assume that any collapsing has been done before we get here
1252        // since we only have one argument we don't need to think about only
1253        // processing single points.
1254      if (m_readytype!='E')
1255      {
1256        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1257      }
1258      if (m_op==IDENTITY)
1259      {
1260        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1261      }
1262      size_t subroffset;
1263      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1264      roffset=m_samplesize*tid;
1265      size_t loop=0;
1266      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1267      size_t step=getNoValues();
1268      size_t offset=roffset;
1269      switch (m_op)
1270      {
1271        case SYM:
1272        for (loop=0;loop<numsteps;++loop)
1273        {
1274            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1275            subroffset+=step;
1276            offset+=step;
1277        }
1278        break;
1279        case NSYM:
1280        for (loop=0;loop<numsteps;++loop)
1281        {
1282            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1283            subroffset+=step;
1284            offset+=step;
1285        }
1286        break;
1287        default:
1288        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1289      }
1290      return &m_samples;
1291    }
1292    
1293    const DataTypes::ValueType*
1294    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1295    {
1296        // we assume that any collapsing has been done before we get here
1297        // since we only have one argument we don't need to think about only
1298        // processing single points.
1299      if (m_readytype!='E')
1300      {
1301        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1302      }
1303      if (m_op==IDENTITY)
1304      {
1305        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1306      }
1307      size_t subroffset;
1308      size_t offset;
1309      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1310      roffset=m_samplesize*tid;
1311      offset=roffset;
1312      size_t loop=0;
1313      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1314      size_t outstep=getNoValues();
1315      size_t instep=m_left->getNoValues();
1316      switch (m_op)
1317      {
1318        case TRACE:
1319        for (loop=0;loop<numsteps;++loop)
1320        {
1321                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1322            subroffset+=instep;
1323            offset+=outstep;
1324        }
1325        break;
1326        case TRANS:
1327        for (loop=0;loop<numsteps;++loop)
1328        {
1329                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1330            subroffset+=instep;
1331            offset+=outstep;
1332        }
1333        break;
1334        default:
1335        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1336    }    }
1337    return result;    return &m_samples;
1338  }  }
1339    
1340    
1341    const DataTypes::ValueType*
1342    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1343    {
1344      if (m_readytype!='E')
1345      {
1346        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1347      }
1348      if (m_op==IDENTITY)
1349      {
1350        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1351      }
1352      size_t subroffset;
1353      size_t offset;
1354      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1355      roffset=m_samplesize*tid;
1356      offset=roffset;
1357      size_t loop=0;
1358      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1359      size_t outstep=getNoValues();
1360      size_t instep=m_left->getNoValues();
1361      switch (m_op)
1362      {
1363        case SWAP:
1364        for (loop=0;loop<numsteps;++loop)
1365        {
1366                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1367            subroffset+=instep;
1368            offset+=outstep;
1369        }
1370        break;
1371        default:
1372        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1373      }
1374      return &m_samples;
1375    }
1376    
1377    const DataTypes::ValueType*
1378    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1379    {
1380      if (m_readytype!='E')
1381      {
1382        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1383      }
1384      if (m_op!=CONDEVAL)
1385      {
1386        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1387      }
1388      size_t subroffset;
1389    
1390      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1391      const ValueType* srcres=0;
1392      if ((*maskres)[subroffset]>0)
1393      {
1394        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1395      }
1396      else
1397      {
1398        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1399      }
1400    
1401      // Now we need to copy the result
1402    
1403      roffset=m_samplesize*tid;
1404      for (int i=0;i<m_samplesize;++i)
1405      {
1406        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1407      }
1408    
1409      return &m_samples;
1410    }
1411    
1412    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1413    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1414    // If both children are expanded, then we can process them in a single operation (we treat
1415    // the whole sample as one big datapoint.
1416    // If one of the children is not expanded, then we need to treat each point in the sample
1417    // individually.
1418    // There is an additional complication when scalar operations are considered.
1419    // For example, 2+Vector.
1420    // In this case each double within the point is treated individually
1421    const DataTypes::ValueType*
1422    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1423    {
1424    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1425    
1426      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1427        // first work out which of the children are expanded
1428      bool leftExp=(m_left->m_readytype=='E');
1429      bool rightExp=(m_right->m_readytype=='E');
1430      if (!leftExp && !rightExp)
1431      {
1432        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1433      }
1434      bool leftScalar=(m_left->getRank()==0);
1435      bool rightScalar=(m_right->getRank()==0);
1436      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1437      {
1438        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1439      }
1440      size_t leftsize=m_left->getNoValues();
1441      size_t rightsize=m_right->getNoValues();
1442      size_t chunksize=1;           // how many doubles will be processed in one go
1443      int leftstep=0;       // how far should the left offset advance after each step
1444      int rightstep=0;
1445      int numsteps=0;       // total number of steps for the inner loop
1446      int oleftstep=0;  // the o variables refer to the outer loop
1447      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1448      int onumsteps=1;
1449      
1450      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1451      bool RES=(rightExp && rightScalar);
1452      bool LS=(!leftExp && leftScalar); // left is a single scalar
1453      bool RS=(!rightExp && rightScalar);
1454      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1455      bool RN=(!rightExp && !rightScalar);
1456      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1457      bool REN=(rightExp && !rightScalar);
1458    
1459      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1460      {
1461        chunksize=m_left->getNumDPPSample()*leftsize;
1462        leftstep=0;
1463        rightstep=0;
1464        numsteps=1;
1465      }
1466      else if (LES || RES)
1467      {
1468        chunksize=1;
1469        if (LES)        // left is an expanded scalar
1470        {
1471            if (RS)
1472            {
1473               leftstep=1;
1474               rightstep=0;
1475               numsteps=m_left->getNumDPPSample();
1476            }
1477            else        // RN or REN
1478            {
1479               leftstep=0;
1480               oleftstep=1;
1481               rightstep=1;
1482               orightstep=(RN ? -(int)rightsize : 0);
1483               numsteps=rightsize;
1484               onumsteps=m_left->getNumDPPSample();
1485            }
1486        }
1487        else        // right is an expanded scalar
1488        {
1489            if (LS)
1490            {
1491               rightstep=1;
1492               leftstep=0;
1493               numsteps=m_right->getNumDPPSample();
1494            }
1495            else
1496            {
1497               rightstep=0;
1498               orightstep=1;
1499               leftstep=1;
1500               oleftstep=(LN ? -(int)leftsize : 0);
1501               numsteps=leftsize;
1502               onumsteps=m_right->getNumDPPSample();
1503            }
1504        }
1505      }
1506      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1507      {
1508        if (LEN)    // and Right will be a single value
1509        {
1510            chunksize=rightsize;
1511            leftstep=rightsize;
1512            rightstep=0;
1513            numsteps=m_left->getNumDPPSample();
1514            if (RS)
1515            {
1516               numsteps*=leftsize;
1517            }
1518        }
1519        else    // REN
1520        {
1521            chunksize=leftsize;
1522            rightstep=leftsize;
1523            leftstep=0;
1524            numsteps=m_right->getNumDPPSample();
1525            if (LS)
1526            {
1527               numsteps*=rightsize;
1528            }
1529        }
1530      }
1531    
1532      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1533        // Get the values of sub-expressions
1534      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1535      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1536    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1537    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1538    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1539    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1540    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1541    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1542    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1543    
1544    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1545    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1546    
1547    
1548      roffset=m_samplesize*tid;
1549      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
1550      switch(m_op)
1551      {
1552        case ADD:
1553            PROC_OP(NO_ARG,plus<double>());
1554        break;
1555        case SUB:
1556        PROC_OP(NO_ARG,minus<double>());
1557        break;
1558        case MUL:
1559        PROC_OP(NO_ARG,multiplies<double>());
1560        break;
1561        case DIV:
1562        PROC_OP(NO_ARG,divides<double>());
1563        break;
1564        case POW:
1565           PROC_OP(double (double,double),::pow);
1566        break;
1567        default:
1568        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1569      }
1570    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1571      return &m_samples;
1572    }
1573    
1574    
1575    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1576    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1577    // unlike the other resolve helpers, we must treat these datapoints separately.
1578    const DataTypes::ValueType*
1579    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1580    {
1581    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1582    
1583      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1584        // first work out which of the children are expanded
1585      bool leftExp=(m_left->m_readytype=='E');
1586      bool rightExp=(m_right->m_readytype=='E');
1587      int steps=getNumDPPSample();
1588      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1589      int rightStep=(rightExp?m_right->getNoValues() : 0);
1590    
1591      int resultStep=getNoValues();
1592      roffset=m_samplesize*tid;
1593      size_t offset=roffset;
1594    
1595      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1596    
1597      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1598    
1599    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1600    cout << getNoValues() << endl;)
1601    
1602    
1603    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1604    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1605    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1606    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1607    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1608    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1609    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1610    
1611      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
1612      switch(m_op)
1613      {
1614        case PROD:
1615        for (int i=0;i<steps;++i,resultp+=resultStep)
1616        {
1617              const double *ptr_0 = &((*left)[lroffset]);
1618              const double *ptr_1 = &((*right)[rroffset]);
1619    
1620    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1621    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1622    
1623              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1624    
1625          lroffset+=leftStep;
1626          rroffset+=rightStep;
1627        }
1628        break;
1629        default:
1630        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1631      }
1632      roffset=offset;
1633      return &m_samples;
1634    }
1635    
1636    
1637    const DataTypes::ValueType*
1638    DataLazy::resolveSample(int sampleNo, size_t& roffset)
1639    {
1640    #ifdef _OPENMP
1641        int tid=omp_get_thread_num();
1642    #else
1643        int tid=0;
1644    #endif
1645    
1646    #ifdef LAZY_STACK_PROF
1647        stackstart[tid]=&tid;
1648        stackend[tid]=&tid;
1649        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1650        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1651        #pragma omp critical
1652        if (d>maxstackuse)
1653        {
1654    cout << "Max resolve Stack use " << d << endl;
1655            maxstackuse=d;
1656        }
1657        return r;
1658    #else
1659        return resolveNodeSample(tid, sampleNo, roffset);
1660    #endif
1661    }
1662    
1663    
1664    // This needs to do the work of the identity constructor
1665    void
1666    DataLazy::resolveToIdentity()
1667    {
1668       if (m_op==IDENTITY)
1669        return;
1670       DataReady_ptr p=resolveNodeWorker();
1671       makeIdentity(p);
1672    }
1673    
1674    void DataLazy::makeIdentity(const DataReady_ptr& p)
1675    {
1676       m_op=IDENTITY;
1677       m_axis_offset=0;
1678       m_transpose=0;
1679       m_SL=m_SM=m_SR=0;
1680       m_children=m_height=0;
1681       m_id=p;
1682       if(p->isConstant()) {m_readytype='C';}
1683       else if(p->isExpanded()) {m_readytype='E';}
1684       else if (p->isTagged()) {m_readytype='T';}
1685       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1686       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1687       m_left.reset();
1688       m_right.reset();
1689    }
1690    
1691    
1692  DataReady_ptr  DataReady_ptr
1693  DataLazy::resolve()  DataLazy::resolve()
1694  {  {
1695    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1696    // so the allocation of v needs to move inside the loop somehow      return m_id;
1697    }
1698    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1699    
1700    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1701    int numthreads=1;  void
1702    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1703    {
1704      if (dats.empty())
1705      {
1706        return;
1707      }
1708      vector<DataLazy*> work;
1709      FunctionSpace fs=dats[0]->getFunctionSpace();
1710      bool match=true;
1711      for (int i=dats.size()-1;i>=0;--i)
1712      {
1713        if (dats[i]->m_readytype!='E')
1714        {
1715            dats[i]->collapse();
1716        }
1717        if (dats[i]->m_op!=IDENTITY)
1718        {
1719            work.push_back(dats[i]);
1720            if (fs!=dats[i]->getFunctionSpace())
1721            {
1722                match=false;
1723            }
1724        }
1725      }
1726      if (work.empty())
1727      {
1728        return;     // no work to do
1729      }
1730      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1731      {     // it is possible that dats[0] is one of the objects which we discarded and
1732            // all the other functionspaces match.
1733        vector<DataExpanded*> dep;
1734        vector<ValueType*> vecs;
1735        for (int i=0;i<work.size();++i)
1736        {
1737            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1738            vecs.push_back(&(dep[i]->getVectorRW()));
1739        }
1740        int totalsamples=work[0]->getNumSamples();
1741        const ValueType* res=0; // Storage for answer
1742        int sample;
1743        #pragma omp parallel private(sample, res)
1744        {
1745            size_t roffset=0;
1746            #pragma omp for schedule(static)
1747            for (sample=0;sample<totalsamples;++sample)
1748            {
1749            roffset=0;
1750            int j;
1751            for (j=work.size()-1;j>=0;--j)
1752            {
1753  #ifdef _OPENMP  #ifdef _OPENMP
1754    numthreads=omp_get_max_threads();                  res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1755    int threadnum=0;  #else
1756  #endif                  res=work[j]->resolveNodeSample(0,sample,roffset);
1757    ValueType v(numthreads*threadbuffersize);  #endif
1758  cout << "Buffer created with size=" << v.size() << endl;                  DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1759    ValueType dummy(getNoValues());                  memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1760    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);          }
1761    ValueType& resvec=result->getVector();          }
1762        }
1763        // Now we need to load the new results as identity ops into the lazy nodes
1764        for (int i=work.size()-1;i>=0;--i)
1765        {
1766            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1767        }
1768      }
1769      else  // functionspaces do not match
1770      {
1771        for (int i=0;i<work.size();++i)
1772        {
1773            work[i]->resolveToIdentity();
1774        }
1775      }
1776    }
1777    
1778    
1779    
1780    // This version of resolve uses storage in each node to hold results
1781    DataReady_ptr
1782    DataLazy::resolveNodeWorker()
1783    {
1784      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1785      {
1786        collapse();
1787      }
1788      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1789      {
1790        return m_id;
1791      }
1792        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1793      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1794      ValueType& resvec=result->getVectorRW();
1795    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1796    
1797    int sample;    int sample;
   int resoffset;  
1798    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1799    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;   // Storage for answer
1800    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1801      #pragma omp parallel private(sample,res)
1802    {    {
1803        size_t roffset=0;
1804    #ifdef LAZY_STACK_PROF
1805        stackstart[omp_get_thread_num()]=&roffset;
1806        stackend[omp_get_thread_num()]=&roffset;
1807    #endif
1808        #pragma omp for schedule(static)
1809        for (sample=0;sample<totalsamples;++sample)
1810        {
1811            roffset=0;
1812  #ifdef _OPENMP  #ifdef _OPENMP
1813      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1814  #else  #else
1815      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1816  #endif  #endif
1817      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1818      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1819      {              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1820      resvec[resoffset]=res[i];              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1821      }      }
1822      }
1823    #ifdef LAZY_STACK_PROF
1824      for (int i=0;i<getNumberOfThreads();++i)
1825      {
1826        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1827    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1828        if (r>maxstackuse)
1829        {
1830            maxstackuse=r;
1831        }
1832    }    }
1833      cout << "Max resolve Stack use=" << maxstackuse << endl;
1834    #endif
1835    return resptr;    return resptr;
1836  }  }
1837    
# Line 430  std::string Line 1839  std::string
1839  DataLazy::toString() const  DataLazy::toString() const
1840  {  {
1841    ostringstream oss;    ostringstream oss;
1842    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1843    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1844      {
1845      case 1:   // tree format
1846        oss << endl;
1847        intoTreeString(oss,"");
1848        break;
1849      case 2:   // just the depth
1850        break;
1851      default:
1852        intoString(oss);
1853        break;
1854      }
1855    return oss.str();    return oss.str();
1856  }  }
1857    
1858    
1859  void  void
1860  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1861  {  {
1862    //    oss << "[" << m_children <<";"<<m_height <<"]";
1863    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1864    {    {
1865    case G_IDENTITY:    case G_IDENTITY:
1866        if (m_id->isExpanded())
1867        {
1868           oss << "E";
1869        }
1870        else if (m_id->isTagged())
1871        {
1872          oss << "T";
1873        }
1874        else if (m_id->isConstant())
1875        {
1876          oss << "C";
1877        }
1878        else
1879        {
1880          oss << "?";
1881        }
1882      oss << '@' << m_id.get();      oss << '@' << m_id.get();
1883      break;      break;
1884    case G_BINARY:    case G_BINARY:
# Line 451  DataLazy::intoString(ostringstream& oss) Line 1889  DataLazy::intoString(ostringstream& oss)
1889      oss << ')';      oss << ')';
1890      break;      break;
1891    case G_UNARY:    case G_UNARY:
1892      case G_UNARY_P:
1893      case G_NP1OUT:
1894      case G_NP1OUT_P:
1895      case G_REDUCTION:
1896        oss << opToString(m_op) << '(';
1897        m_left->intoString(oss);
1898        oss << ')';
1899        break;
1900      case G_TENSORPROD:
1901        oss << opToString(m_op) << '(';
1902        m_left->intoString(oss);
1903        oss << ", ";
1904        m_right->intoString(oss);
1905        oss << ')';
1906        break;
1907      case G_NP1OUT_2P:
1908      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1909      m_left->intoString(oss);      m_left->intoString(oss);
1910        oss << ", " << m_axis_offset << ", " << m_transpose;
1911        oss << ')';
1912        break;
1913      case G_CONDEVAL:
1914        oss << opToString(m_op)<< '(' ;
1915        m_mask->intoString(oss);
1916        oss << " ? ";
1917        m_left->intoString(oss);
1918        oss << " : ";
1919        m_right->intoString(oss);
1920      oss << ')';      oss << ')';
1921      break;      break;
1922    default:    default:
# Line 460  DataLazy::intoString(ostringstream& oss) Line 1924  DataLazy::intoString(ostringstream& oss)
1924    }    }
1925  }  }
1926    
1927  // Note that in this case, deepCopy does not make copies of the leaves.  
1928  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1929    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1930    {
1931      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1932      switch (getOpgroup(m_op))
1933      {
1934      case G_IDENTITY:
1935        if (m_id->isExpanded())
1936        {
1937           oss << "E";
1938        }
1939        else if (m_id->isTagged())
1940        {
1941          oss << "T";
1942        }
1943        else if (m_id->isConstant())
1944        {
1945          oss << "C";
1946        }
1947        else
1948        {
1949          oss << "?";
1950        }
1951        oss << '@' << m_id.get() << endl;
1952        break;
1953      case G_BINARY:
1954        oss << opToString(m_op) << endl;
1955        indent+='.';
1956        m_left->intoTreeString(oss, indent);
1957        m_right->intoTreeString(oss, indent);
1958        break;
1959      case G_UNARY:
1960      case G_UNARY_P:
1961      case G_NP1OUT:
1962      case G_NP1OUT_P:
1963      case G_REDUCTION:
1964        oss << opToString(m_op) << endl;
1965        indent+='.';
1966        m_left->intoTreeString(oss, indent);
1967        break;
1968      case G_TENSORPROD:
1969        oss << opToString(m_op) << endl;
1970        indent+='.';
1971        m_left->intoTreeString(oss, indent);
1972        m_right->intoTreeString(oss, indent);
1973        break;
1974      case G_NP1OUT_2P:
1975        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1976        indent+='.';
1977        m_left->intoTreeString(oss, indent);
1978        break;
1979      default:
1980        oss << "UNKNOWN";
1981      }
1982    }
1983    
1984    
1985  DataAbstract*  DataAbstract*
1986  DataLazy::deepCopy()  DataLazy::deepCopy()
1987  {  {
1988    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1989    {    {
1990      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1991      case G_UNARY:
1992      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1993      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1994      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1995      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1996      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1997      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1998      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1999      default:
2000        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2001    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2002  }  }
2003    
2004    
2005    
2006    // There is no single, natural interpretation of getLength on DataLazy.
2007    // Instances of DataReady can look at the size of their vectors.
2008    // For lazy though, it could be the size the data would be if it were resolved;
2009    // or it could be some function of the lengths of the DataReady instances which
2010    // form part of the expression.
2011    // Rather than have people making assumptions, I have disabled the method.
2012  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2013  DataLazy::getLength() const  DataLazy::getLength() const
2014  {  {
2015    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2016  }  }
2017    
2018    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 2022  DataLazy::getSlice(const DataTypes::Regi
2022    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2023  }  }
2024    
2025    
2026    // To do this we need to rely on our child nodes
2027    DataTypes::ValueType::size_type
2028    DataLazy::getPointOffset(int sampleNo,
2029                     int dataPointNo)
2030    {
2031      if (m_op==IDENTITY)
2032      {
2033        return m_id->getPointOffset(sampleNo,dataPointNo);
2034      }
2035      if (m_readytype!='E')
2036      {
2037        collapse();
2038        return m_id->getPointOffset(sampleNo,dataPointNo);
2039      }
2040      // at this point we do not have an identity node and the expression will be Expanded
2041      // so we only need to know which child to ask
2042      if (m_left->m_readytype=='E')
2043      {
2044        return m_left->getPointOffset(sampleNo,dataPointNo);
2045      }
2046      else
2047      {
2048        return m_right->getPointOffset(sampleNo,dataPointNo);
2049      }
2050    }
2051    
2052    // To do this we need to rely on our child nodes
2053  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2054  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2055                   int dataPointNo) const                   int dataPointNo) const
2056  {  {
2057    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2058      {
2059        return m_id->getPointOffset(sampleNo,dataPointNo);
2060      }
2061      if (m_readytype=='E')
2062      {
2063        // at this point we do not have an identity node and the expression will be Expanded
2064        // so we only need to know which child to ask
2065        if (m_left->m_readytype=='E')
2066        {
2067        return m_left->getPointOffset(sampleNo,dataPointNo);
2068        }
2069        else
2070        {
2071        return m_right->getPointOffset(sampleNo,dataPointNo);
2072        }
2073      }
2074      if (m_readytype=='C')
2075      {
2076        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2077      }
2078      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2079    }
2080    
2081    
2082    // I have decided to let Data:: handle this issue.
2083    void
2084    DataLazy::setToZero()
2085    {
2086    //   DataTypes::ValueType v(getNoValues(),0);
2087    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2088    //   m_op=IDENTITY;
2089    //   m_right.reset();  
2090    //   m_left.reset();
2091    //   m_readytype='C';
2092    //   m_buffsRequired=1;
2093    
2094      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2095      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2096    }
2097    
2098    bool
2099    DataLazy::actsExpanded() const
2100    {
2101        return (m_readytype=='E');
2102  }  }
2103    
2104  }   // end namespace  }   // end namespace

Legend:
Removed from v.1888  
changed lines
  Added in v.3917

  ViewVC Help
Powered by ViewVC 1.1.26