/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1888 by jfenwick, Wed Oct 15 04:00:21 2008 UTC trunk/escript/src/DataLazy.cpp revision 4154 by jfenwick, Tue Jan 22 09:30:23 2013 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2013 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development since 2012 by School of Earth Sciences
13    *
14    *****************************************************************************/
15    
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
19  #ifdef _OPENMP  #ifdef _OPENMP
20  #include <omp.h>  #include <omp.h>
21  #endif  #endif
# Line 26  Line 23 
23  #include "DataTypes.h"  #include "DataTypes.h"
24  #include "Data.h"  #include "Data.h"
25  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
26    #include "Utils.h"
27    
28    #include "EscriptParams.h"
29    
30    #ifdef USE_NETCDF
31    #include <netcdfcpp.h>
32    #endif
33    
34    #include <iomanip>      // for some fancy formatting in debug
35    
36    // #define LAZYDEBUG(X) if (privdebug){X;}
37    #define LAZYDEBUG(X)
38    namespace
39    {
40    bool privdebug=false;
41    
42    #define ENABLEDEBUG privdebug=true;
43    #define DISABLEDEBUG privdebug=false;
44    }
45    
46    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47    
48    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49    
50    
51    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53    /*
54    How does DataLazy work?
55    ~~~~~~~~~~~~~~~~~~~~~~~
56    
57    Each instance represents a single operation on one or two other DataLazy instances. These arguments are normally
58    denoted left and right.
59    
60    A special operation, IDENTITY, stores an instance of DataReady in the m_id member.
61    This means that all "internal" nodes in the structure are instances of DataLazy.
62    
63    Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
64    Note that IDENITY is not considered a unary operation.
65    
66    I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
67    It must however form a DAG (directed acyclic graph).
68    I will refer to individual DataLazy objects with the structure as nodes.
69    
70    Each node also stores:
71    - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
72        evaluated.
73    - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
74        evaluate the expression.
75    - m_samplesize ~ the number of doubles stored in a sample.
76    
77    When a new node is created, the above values are computed based on the values in the child nodes.
78    Eg: if left requires 4 samples and right requires 6 then left+right requires 7 samples.
79    
80    The resolve method, which produces a DataReady from a DataLazy, does the following:
81    1) Create a DataReady to hold the new result.
82    2) Allocate a vector (v) big enough to hold m_buffsrequired samples.
83    3) For each sample, call resolveSample with v, to get its values and copy them into the result object.
84    
85    (In the case of OMP, multiple samples are resolved in parallel so the vector needs to be larger.)
86    
87    resolveSample returns a Vector* and an offset within that vector where the result is stored.
88    Normally, this would be v, but for identity nodes their internal vector is returned instead.
89    
90    The convention that I use, is that the resolve methods should store their results starting at the offset they are passed.
91    
92    For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
93    The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
94    
95    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
96    1) Add to the ES_optype.
97    2) determine what opgroup your operation belongs to (X)
98    3) add a string for the op to the end of ES_opstrings
99    4) increase ES_opcount
100    5) add an entry (X) to opgroups
101    6) add an entry to the switch in collapseToReady
102    7) add an entry to resolveX
103    */
104    
105    
106  using namespace std;  using namespace std;
107  using namespace boost;  using namespace boost;
# Line 33  using namespace boost; Line 109  using namespace boost;
109  namespace escript  namespace escript
110  {  {
111    
 const std::string&  
 opToString(ES_optype op);  
   
112  namespace  namespace
113  {  {
114    
115    
116    // enabling this will print out when ever the maximum stacksize used by resolve increases
117    // it assumes _OPENMP is also in use
118    //#define LAZY_STACK_PROF
119    
120    
121    
122    #ifndef _OPENMP
123      #ifdef LAZY_STACK_PROF
124      #undef LAZY_STACK_PROF
125      #endif
126    #endif
127    
128    
129    #ifdef LAZY_STACK_PROF
130    std::vector<void*> stackstart(getNumberOfThreads());
131    std::vector<void*> stackend(getNumberOfThreads());
132    size_t maxstackuse=0;
133    #endif
134    
135  enum ES_opgroup  enum ES_opgroup
136  {  {
137     G_UNKNOWN,     G_UNKNOWN,
138     G_IDENTITY,     G_IDENTITY,
139     G_BINARY,     G_BINARY,        // pointwise operations with two arguments
140     G_UNARY     G_UNARY,     // pointwise operations with one argument
141       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
142       G_NP1OUT,        // non-pointwise op with one output
143       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
144       G_TENSORPROD,    // general tensor product
145       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
146       G_REDUCTION,     // non-pointwise unary op with a scalar output
147       G_CONDEVAL
148  };  };
149    
150    
151    
152    
153  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","sin","cos","tan",  string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",
154                "sin","cos","tan",
155              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
156              "asinh","acosh","atanh",              "asinh","acosh","atanh",
157              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
158              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
159  int ES_opcount=32;              "symmetric","nonsymmetric",
160  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY,G_UNARY,G_UNARY,G_UNARY, //9              "prod",
161              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 16              "transpose", "trace",
162              G_UNARY,G_UNARY,G_UNARY,                    // 19              "swapaxes",
163              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 27              "minval", "maxval",
164              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              "condEval"};
165    int ES_opcount=44;
166    ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167                G_UNARY,G_UNARY,G_UNARY, //10
168                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
169                G_UNARY,G_UNARY,G_UNARY,                    // 20
170                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
171                G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
172                G_NP1OUT,G_NP1OUT,
173                G_TENSORPROD,
174                G_NP1OUT_P, G_NP1OUT_P,
175                G_NP1OUT_2P,
176                G_REDUCTION, G_REDUCTION,
177                G_CONDEVAL};
178  inline  inline
179  ES_opgroup  ES_opgroup
180  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 79  resultFS(DataAbstract_ptr left, DataAbst Line 191  resultFS(DataAbstract_ptr left, DataAbst
191      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
192      // programming error exception.      // programming error exception.
193    
194      FunctionSpace l=left->getFunctionSpace();
195      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
196      {    if (l!=r)
197          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
198      }      if (r.probeInterpolation(l))
199      return left->getFunctionSpace();      {
200        return l;
201        }
202        if (l.probeInterpolation(r))
203        {
204        return r;
205        }
206        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
207      }
208      return l;
209  }  }
210    
211  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
212    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
213  DataTypes::ShapeType  DataTypes::ShapeType
214  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
215  {  {
216      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
217      {      {
218          throw DataException("Shapes not the same - shapes must match for lazy data.");        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
219          {
220            throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221          }
222    
223          if (left->getRank()==0)   // we need to allow scalar * anything
224          {
225            return right->getShape();
226          }
227          if (right->getRank()==0)
228          {
229            return left->getShape();
230          }
231          throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
232      }      }
233      return left->getShape();      return left->getShape();
234  }  }
235    
236  size_t  // return the shape for "op left"
237  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
238    DataTypes::ShapeType
239    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240  {  {
241     switch (getOpgroup(op))      switch(op)
242     {      {
243     case G_BINARY: return left->getLength();          case TRANS:
244     case G_UNARY: return left->getLength();         {            // for the scoping of variables
245     default:          const DataTypes::ShapeType& s=left->getShape();
246      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
247     }          int rank=left->getRank();
248            if (axis_offset<0 || axis_offset>rank)
249            {
250                stringstream e;
251                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252                throw DataException(e.str());
253            }
254            for (int i=0; i<rank; i++)
255            {
256               int index = (axis_offset+i)%rank;
257               sh.push_back(s[index]); // Append to new shape
258            }
259            return sh;
260           }
261        break;
262        case TRACE:
263           {
264            int rank=left->getRank();
265            if (rank<2)
266            {
267               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268            }
269            if ((axis_offset>rank-2) || (axis_offset<0))
270            {
271               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272            }
273            if (rank==2)
274            {
275               return DataTypes::scalarShape;
276            }
277            else if (rank==3)
278            {
279               DataTypes::ShapeType sh;
280                   if (axis_offset==0)
281               {
282                    sh.push_back(left->getShape()[2]);
283                   }
284                   else     // offset==1
285               {
286                sh.push_back(left->getShape()[0]);
287                   }
288               return sh;
289            }
290            else if (rank==4)
291            {
292               DataTypes::ShapeType sh;
293               const DataTypes::ShapeType& s=left->getShape();
294                   if (axis_offset==0)
295               {
296                    sh.push_back(s[2]);
297                    sh.push_back(s[3]);
298                   }
299                   else if (axis_offset==1)
300               {
301                    sh.push_back(s[0]);
302                    sh.push_back(s[3]);
303                   }
304               else     // offset==2
305               {
306                sh.push_back(s[0]);
307                sh.push_back(s[1]);
308               }
309               return sh;
310            }
311            else        // unknown rank
312            {
313               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314            }
315           }
316        break;
317            default:
318        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319        }
320  }  }
321    
322  int  DataTypes::ShapeType
323  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
324  {  {
325     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
326     {       // Some of the checks are probably redundant here
327     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
328     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
329     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
330     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332     }       int rank=left->getRank();
333         if (rank<2) {
334            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335         }
336         if (axis0<0 || axis0>rank-1) {
337            stringstream e;
338            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339            throw DataException(e.str());
340         }
341         if (axis1<0 || axis1>rank-1) {
342            stringstream e;
343            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344            throw DataException(e.str());
345         }
346         if (axis0 == axis1) {
347             throw DataException("Error - Data::swapaxes: axis indices must be different.");
348         }
349         if (axis0 > axis1) {
350             axis0_tmp=axis1;
351             axis1_tmp=axis0;
352         } else {
353             axis0_tmp=axis0;
354             axis1_tmp=axis1;
355         }
356         for (int i=0; i<rank; i++) {
357           if (i == axis0_tmp) {
358              out_shape.push_back(s[axis1_tmp]);
359           } else if (i == axis1_tmp) {
360              out_shape.push_back(s[axis0_tmp]);
361           } else {
362              out_shape.push_back(s[i]);
363           }
364         }
365        return out_shape;
366  }  }
367    
368    
369    // determine the output shape for the general tensor product operation
370    // the additional parameters return information required later for the product
371    // the majority of this code is copy pasted from C_General_Tensor_Product
372    DataTypes::ShapeType
373    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
374    {
375        
376      // Get rank and shape of inputs
377      int rank0 = left->getRank();
378      int rank1 = right->getRank();
379      const DataTypes::ShapeType& shape0 = left->getShape();
380      const DataTypes::ShapeType& shape1 = right->getShape();
381    
382      // Prepare for the loops of the product and verify compatibility of shapes
383      int start0=0, start1=0;
384      if (transpose == 0)       {}
385      else if (transpose == 1)  { start0 = axis_offset; }
386      else if (transpose == 2)  { start1 = rank1-axis_offset; }
387      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388    
389      if (rank0<axis_offset)
390      {
391        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392      }
393    
394      // Adjust the shapes for transpose
395      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
396      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
397      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
398      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
399    
400      // Prepare for the loops of the product
401      SL=1, SM=1, SR=1;
402      for (int i=0; i<rank0-axis_offset; i++)   {
403        SL *= tmpShape0[i];
404      }
405      for (int i=rank0-axis_offset; i<rank0; i++)   {
406        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
407          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
408        }
409        SM *= tmpShape0[i];
410      }
411      for (int i=axis_offset; i<rank1; i++)     {
412        SR *= tmpShape1[i];
413      }
414    
415      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
416      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
417      {         // block to limit the scope of out_index
418         int out_index=0;
419         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
421      }
422    
423      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
424      {
425         ostringstream os;
426         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
427         throw DataException(os.str());
428      }
429    
430      return shape2;
431    }
432    
433  }   // end anonymous namespace  }   // end anonymous namespace
434    
435    
436    
437    // Return a string representing the operation
438  const std::string&  const std::string&
439  opToString(ES_optype op)  opToString(ES_optype op)
440  {  {
# Line 138  opToString(ES_optype op) Line 445  opToString(ES_optype op)
445    return ES_opstrings[op];    return ES_opstrings[op];
446  }  }
447    
448    void DataLazy::LazyNodeSetup()
449    {
450    #ifdef _OPENMP
451        int numthreads=omp_get_max_threads();
452        m_samples.resize(numthreads*m_samplesize);
453        m_sampleids=new int[numthreads];
454        for (int i=0;i<numthreads;++i)
455        {
456            m_sampleids[i]=-1;  
457        }
458    #else
459        m_samples.resize(m_samplesize);
460        m_sampleids=new int[1];
461        m_sampleids[0]=-1;
462    #endif  // _OPENMP
463    }
464    
465    
466    // Creates an identity node
467  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
468      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
469      m_op(IDENTITY)      ,m_sampleids(0),
470        m_samples(1)
471  {  {
472     if (p->isLazy())     if (p->isLazy())
473     {     {
     // TODO: fix this.   We could make the new node a copy of p?  
474      // I don't want identity of Lazy.      // I don't want identity of Lazy.
475      // Question: Why would that be so bad?      // Question: Why would that be so bad?
476      // Answer: We assume that the child of ID is something we can call getVector on      // Answer: We assume that the child of ID is something we can call getVector on
# Line 153  DataLazy::DataLazy(DataAbstract_ptr p) Line 478  DataLazy::DataLazy(DataAbstract_ptr p)
478     }     }
479     else     else
480     {     {
481      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
482        DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
483        makeIdentity(dr);
484    LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
485     }     }
486     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
487  }  }
488    
489  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
490      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
491      m_op(op)      m_op(op),
492        m_axis_offset(0),
493        m_transpose(0),
494        m_SL(0), m_SM(0), m_SR(0)
495  {  {
496     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
497     {     {
498      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
499     }     }
500    
501     DataLazy_ptr lleft;     DataLazy_ptr lleft;
502     if (!left->isLazy())     if (!left->isLazy())
503     {     {
# Line 178  DataLazy::DataLazy(DataAbstract_ptr left Line 507  DataLazy::DataLazy(DataAbstract_ptr left
507     {     {
508      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
509     }     }
510     m_length=left->getLength();     m_readytype=lleft->m_readytype;
511     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
512     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
513       m_children=m_left->m_children+1;
514       m_height=m_left->m_height+1;
515       LazyNodeSetup();
516       SIZELIMIT
517  }  }
518    
519    
520  DataLazy::DataLazy(DataLazy_ptr left, DataLazy_ptr right, ES_optype op)  // In this constructor we need to consider interpolation
521    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
522      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
523      m_left(left),      m_op(op),
524      m_right(right),      m_SL(0), m_SM(0), m_SR(0)
     m_op(op)  
525  {  {
526     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
527       if ((getOpgroup(op)!=G_BINARY))
528     {     {
529      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
530     }     }
531     m_length=resultLength(m_left,m_right,m_op);  
532       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
533       {
534        FunctionSpace fs=getFunctionSpace();
535        Data ltemp(left);
536        Data tmp(ltemp,fs);
537        left=tmp.borrowDataPtr();
538       }
539       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
540       {
541        Data tmp(Data(right),getFunctionSpace());
542        right=tmp.borrowDataPtr();
543    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
544       }
545       left->operandCheck(*right);
546    
547       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
548       {
549        m_left=dynamic_pointer_cast<DataLazy>(left);
550    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
551       }
552       else
553       {
554        m_left=DataLazy_ptr(new DataLazy(left));
555    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
556       }
557       if (right->isLazy())
558       {
559        m_right=dynamic_pointer_cast<DataLazy>(right);
560    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
561       }
562       else
563       {
564        m_right=DataLazy_ptr(new DataLazy(right));
565    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
566       }
567       char lt=m_left->m_readytype;
568       char rt=m_right->m_readytype;
569       if (lt=='E' || rt=='E')
570       {
571        m_readytype='E';
572       }
573       else if (lt=='T' || rt=='T')
574       {
575        m_readytype='T';
576       }
577       else
578       {
579        m_readytype='C';
580       }
581     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
582     m_buffsRequired=calcBuffs(m_left, m_right, m_op);     m_children=m_left->m_children+m_right->m_children+2;
583  cout << "(2)Lazy created with " << m_samplesize << endl;     m_height=max(m_left->m_height,m_right->m_height)+1;
584       LazyNodeSetup();
585       SIZELIMIT
586    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
587  }  }
588    
589  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
590      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
591      m_op(op)      m_op(op),
592        m_axis_offset(axis_offset),
593        m_transpose(transpose)
594  {  {
595     if (getOpgroup(op)!=G_BINARY)     if ((getOpgroup(op)!=G_TENSORPROD))
596       {
597        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
598       }
599       if ((transpose>2) || (transpose<0))
600       {
601        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
602       }
603       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
604     {     {
605      throw DataException("Programmer error - constructor DataLazy(left, op) will only process BINARY operations.");      FunctionSpace fs=getFunctionSpace();
606        Data ltemp(left);
607        Data tmp(ltemp,fs);
608        left=tmp.borrowDataPtr();
609     }     }
610     if (left->isLazy())     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
611       {
612        Data tmp(Data(right),getFunctionSpace());
613        right=tmp.borrowDataPtr();
614       }
615    //    left->operandCheck(*right);
616    
617       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
618     {     {
619      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
620     }     }
# Line 225  DataLazy::DataLazy(DataAbstract_ptr left Line 630  DataLazy::DataLazy(DataAbstract_ptr left
630     {     {
631      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
632     }     }
633       char lt=m_left->m_readytype;
634       char rt=m_right->m_readytype;
635       if (lt=='E' || rt=='E')
636       {
637        m_readytype='E';
638       }
639       else if (lt=='T' || rt=='T')
640       {
641        m_readytype='T';
642       }
643       else
644       {
645        m_readytype='C';
646       }
647       m_samplesize=getNumDPPSample()*getNoValues();
648       m_children=m_left->m_children+m_right->m_children+2;
649       m_height=max(m_left->m_height,m_right->m_height)+1;
650       LazyNodeSetup();
651       SIZELIMIT
652    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
653    }
654    
655    
656    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
657        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
658        m_op(op),
659        m_axis_offset(axis_offset),
660        m_transpose(0),
661        m_tol(0)
662    {
663       if ((getOpgroup(op)!=G_NP1OUT_P))
664       {
665        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
666       }
667       DataLazy_ptr lleft;
668       if (!left->isLazy())
669       {
670        lleft=DataLazy_ptr(new DataLazy(left));
671       }
672       else
673       {
674        lleft=dynamic_pointer_cast<DataLazy>(left);
675       }
676       m_readytype=lleft->m_readytype;
677       m_left=lleft;
678       m_samplesize=getNumDPPSample()*getNoValues();
679       m_children=m_left->m_children+1;
680       m_height=m_left->m_height+1;
681       LazyNodeSetup();
682       SIZELIMIT
683    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
684    }
685    
686    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
687        : parent(left->getFunctionSpace(), left->getShape()),
688        m_op(op),
689        m_axis_offset(0),
690        m_transpose(0),
691        m_tol(tol)
692    {
693       if ((getOpgroup(op)!=G_UNARY_P))
694       {
695        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
696       }
697       DataLazy_ptr lleft;
698       if (!left->isLazy())
699       {
700        lleft=DataLazy_ptr(new DataLazy(left));
701       }
702       else
703       {
704        lleft=dynamic_pointer_cast<DataLazy>(left);
705       }
706       m_readytype=lleft->m_readytype;
707       m_left=lleft;
708       m_samplesize=getNumDPPSample()*getNoValues();
709       m_children=m_left->m_children+1;
710       m_height=m_left->m_height+1;
711       LazyNodeSetup();
712       SIZELIMIT
713    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
714    }
715    
716     m_length=resultLength(m_left,m_right,m_op);  
717    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
718        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
719        m_op(op),
720        m_axis_offset(axis0),
721        m_transpose(axis1),
722        m_tol(0)
723    {
724       if ((getOpgroup(op)!=G_NP1OUT_2P))
725       {
726        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
727       }
728       DataLazy_ptr lleft;
729       if (!left->isLazy())
730       {
731        lleft=DataLazy_ptr(new DataLazy(left));
732       }
733       else
734       {
735        lleft=dynamic_pointer_cast<DataLazy>(left);
736       }
737       m_readytype=lleft->m_readytype;
738       m_left=lleft;
739     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
740     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_children=m_left->m_children+1;
741  cout << "(3)Lazy created with " << m_samplesize << endl;     m_height=m_left->m_height+1;
742       LazyNodeSetup();
743       SIZELIMIT
744    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
745  }  }
746    
747    
748  DataLazy::~DataLazy()  namespace
749  {  {
750    
751        inline int max3(int a, int b, int c)
752        {
753        int t=(a>b?a:b);
754        return (t>c?t:c);
755    
756        }
757    }
758    
759    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
760        : parent(left->getFunctionSpace(), left->getShape()),
761        m_op(CONDEVAL),
762        m_axis_offset(0),
763        m_transpose(0),
764        m_tol(0)
765    {
766    
767       DataLazy_ptr lmask;
768       DataLazy_ptr lleft;
769       DataLazy_ptr lright;
770       if (!mask->isLazy())
771       {
772        lmask=DataLazy_ptr(new DataLazy(mask));
773       }
774       else
775       {
776        lmask=dynamic_pointer_cast<DataLazy>(mask);
777       }
778       if (!left->isLazy())
779       {
780        lleft=DataLazy_ptr(new DataLazy(left));
781       }
782       else
783       {
784        lleft=dynamic_pointer_cast<DataLazy>(left);
785       }
786       if (!right->isLazy())
787       {
788        lright=DataLazy_ptr(new DataLazy(right));
789       }
790       else
791       {
792        lright=dynamic_pointer_cast<DataLazy>(right);
793       }
794       m_readytype=lmask->m_readytype;
795       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
796       {
797        throw DataException("Programmer Error - condEval arguments must have the same readytype");
798       }
799       m_left=lleft;
800       m_right=lright;
801       m_mask=lmask;
802       m_samplesize=getNumDPPSample()*getNoValues();
803       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
804       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
805       LazyNodeSetup();
806       SIZELIMIT
807    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
808  }  }
809    
810    
811  int  
812  DataLazy::getBuffsRequired() const  DataLazy::~DataLazy()
813  {  {
814      return m_buffsRequired;     delete[] m_sampleids;
815  }  }
816    
817    
818  // the vector and the offset are a place where the method could write its data if it wishes  /*
819  // it is not obligated to do so. For example, if it has its own storage already, it can use that.    \brief Evaluates the expression using methods on Data.
820  // Hence the return value to indicate where the data is actually stored.    This does the work for the collapse method.
821  // Regardless, the storage should be assumed to be used, even if it isn't.    For reasons of efficiency do not call this method on DataExpanded nodes.
822  const double*  */
823  DataLazy::resolveSample(ValueType& v,int sampleNo,  size_t offset ) const  DataReady_ptr
824    DataLazy::collapseToReady()
825  {  {
826    if (m_op==IDENTITY)      if (m_readytype=='E')
827      { // this is more an efficiency concern than anything else
828        throw DataException("Programmer Error - do not use collapse on Expanded data.");
829      }
830      if (m_op==IDENTITY)
831    {    {
832      const ValueType& vec=m_id->getVector();      return m_id;
     return &(vec[m_id->getPointOffset(sampleNo, 0)]);  
833    }    }
834    size_t rightoffset=offset+m_samplesize;    DataReady_ptr pleft=m_left->collapseToReady();
835    const double* left=m_left->resolveSample(v,sampleNo,offset);    Data left(pleft);
836    const double* right=0;    Data right;
837    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
838    {    {
839      right=m_right->resolveSample(v,sampleNo,rightoffset);      right=Data(m_right->collapseToReady());
840    }    }
841    double* result=&(v[offset]);    Data result;
842      switch(m_op)
843    {    {
844      switch(m_op)      case ADD:
845      {      result=left+right;
     case ADD:       // since these are pointwise ops, pretend each sample is one point  
     tensor_binary_operation(m_samplesize, left, right, result, plus<double>());  
846      break;      break;
847      case SUB:            case SUB:      
848      tensor_binary_operation(m_samplesize, left, right, result, minus<double>());      result=left-right;
849      break;      break;
850      case MUL:            case MUL:      
851      tensor_binary_operation(m_samplesize, left, right, result, multiplies<double>());      result=left*right;
852      break;      break;
853      case DIV:            case DIV:      
854      tensor_binary_operation(m_samplesize, left, right, result, divides<double>());      result=left/right;
855      break;      break;
 // unary ops  
856      case SIN:      case SIN:
857      tensor_unary_operation(m_samplesize, left, result, ::sin);      result=left.sin();  
858        break;
859        case COS:
860        result=left.cos();
861        break;
862        case TAN:
863        result=left.tan();
864        break;
865        case ASIN:
866        result=left.asin();
867        break;
868        case ACOS:
869        result=left.acos();
870        break;
871        case ATAN:
872        result=left.atan();
873        break;
874        case SINH:
875        result=left.sinh();
876        break;
877        case COSH:
878        result=left.cosh();
879        break;
880        case TANH:
881        result=left.tanh();
882        break;
883        case ERF:
884        result=left.erf();
885        break;
886       case ASINH:
887        result=left.asinh();
888        break;
889       case ACOSH:
890        result=left.acosh();
891        break;
892       case ATANH:
893        result=left.atanh();
894        break;
895        case LOG10:
896        result=left.log10();
897        break;
898        case LOG:
899        result=left.log();
900        break;
901        case SIGN:
902        result=left.sign();
903        break;
904        case ABS:
905        result=left.abs();
906        break;
907        case NEG:
908        result=left.neg();
909        break;
910        case POS:
911        // it doesn't mean anything for delayed.
912        // it will just trigger a deep copy of the lazy object
913        throw DataException("Programmer error - POS not supported for lazy data.");
914        break;
915        case EXP:
916        result=left.exp();
917        break;
918        case SQRT:
919        result=left.sqrt();
920        break;
921        case RECIP:
922        result=left.oneOver();
923        break;
924        case GZ:
925        result=left.wherePositive();
926        break;
927        case LZ:
928        result=left.whereNegative();
929        break;
930        case GEZ:
931        result=left.whereNonNegative();
932        break;
933        case LEZ:
934        result=left.whereNonPositive();
935        break;
936        case NEZ:
937        result=left.whereNonZero(m_tol);
938        break;
939        case EZ:
940        result=left.whereZero(m_tol);
941        break;
942        case SYM:
943        result=left.symmetric();
944        break;
945        case NSYM:
946        result=left.nonsymmetric();
947        break;
948        case PROD:
949        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
950        break;
951        case TRANS:
952        result=left.transpose(m_axis_offset);
953        break;
954        case TRACE:
955        result=left.trace(m_axis_offset);
956        break;
957        case SWAP:
958        result=left.swapaxes(m_axis_offset, m_transpose);
959        break;
960        case MINVAL:
961        result=left.minval();
962        break;
963        case MAXVAL:
964        result=left.minval();
965        break;
966        default:
967        throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
968      }
969      return result.borrowReadyPtr();
970    }
971    
972    /*
973       \brief Converts the DataLazy into an IDENTITY storing the value of the expression.
974       This method uses the original methods on the Data class to evaluate the expressions.
975       For this reason, it should not be used on DataExpanded instances. (To do so would defeat
976       the purpose of using DataLazy in the first place).
977    */
978    void
979    DataLazy::collapse()
980    {
981      if (m_op==IDENTITY)
982      {
983        return;
984      }
985      if (m_readytype=='E')
986      { // this is more an efficiency concern than anything else
987        throw DataException("Programmer Error - do not use collapse on Expanded data.");
988      }
989      m_id=collapseToReady();
990      m_op=IDENTITY;
991    }
992    
993    
994    
995    
996    
997    
998    #define PROC_OP(TYPE,X)                               \
999        for (int j=0;j<onumsteps;++j)\
1000        {\
1001          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1002          { \
1003    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1004    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1005             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1006    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1007             lroffset+=leftstep; \
1008             rroffset+=rightstep; \
1009          }\
1010          lroffset+=oleftstep;\
1011          rroffset+=orightstep;\
1012        }
1013    
1014    
1015    // The result will be stored in m_samples
1016    // The return value is a pointer to the DataVector, offset is the offset within the return value
1017    const DataTypes::ValueType*
1018    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1019    {
1020    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1021        // collapse so we have a 'E' node or an IDENTITY for some other type
1022      if (m_readytype!='E' && m_op!=IDENTITY)
1023      {
1024        collapse();
1025      }
1026      if (m_op==IDENTITY)  
1027      {
1028        const ValueType& vec=m_id->getVectorRO();
1029        roffset=m_id->getPointOffset(sampleNo, 0);
1030    #ifdef LAZY_STACK_PROF
1031    int x;
1032    if (&x<stackend[omp_get_thread_num()])
1033    {
1034           stackend[omp_get_thread_num()]=&x;
1035    }
1036    #endif
1037        return &(vec);
1038      }
1039      if (m_readytype!='E')
1040      {
1041        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1042      }
1043      if (m_sampleids[tid]==sampleNo)
1044      {
1045        roffset=tid*m_samplesize;
1046        return &(m_samples);        // sample is already resolved
1047      }
1048      m_sampleids[tid]=sampleNo;
1049    
1050      switch (getOpgroup(m_op))
1051      {
1052      case G_UNARY:
1053      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1054      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1055      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1056      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1057      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1058      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1059      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1060      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1061      default:
1062        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1063      }
1064    }
1065    
1066    const DataTypes::ValueType*
1067    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1068    {
1069        // we assume that any collapsing has been done before we get here
1070        // since we only have one argument we don't need to think about only
1071        // processing single points.
1072        // we will also know we won't get identity nodes
1073      if (m_readytype!='E')
1074      {
1075        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1076      }
1077      if (m_op==IDENTITY)
1078      {
1079        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1080      }
1081      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1082      const double* left=&((*leftres)[roffset]);
1083      roffset=m_samplesize*tid;
1084      double* result=&(m_samples[roffset]);
1085      switch (m_op)
1086      {
1087        case SIN:  
1088        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1089      break;      break;
1090      case COS:      case COS:
1091      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1092      break;      break;
1093      case TAN:      case TAN:
1094      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1095      break;      break;
1096      case ASIN:      case ASIN:
1097      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1098      break;      break;
1099      case ACOS:      case ACOS:
1100      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1101      break;      break;
1102      case ATAN:      case ATAN:
1103      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1104      break;      break;
1105      case SINH:      case SINH:
1106      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1107      break;      break;
1108      case COSH:      case COSH:
1109      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1110      break;      break;
1111      case TANH:      case TANH:
1112      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1113      break;      break;
1114      case ERF:      case ERF:
1115  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1116      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1117  #else  #else
1118      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1119      break;      break;
1120  #endif  #endif
1121     case ASINH:     case ASINH:
1122  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1123      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1124  #else  #else
1125      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1126  #endif    #endif  
1127      break;      break;
1128     case ACOSH:     case ACOSH:
1129  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1130      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1131  #else  #else
1132      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1133  #endif    #endif  
1134      break;      break;
1135     case ATANH:     case ATANH:
1136  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1137      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1138  #else  #else
1139      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1140  #endif    #endif  
1141      break;      break;
1142      case LOG10:      case LOG10:
1143      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1144      break;      break;
1145      case LOG:      case LOG:
1146      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1147      break;      break;
1148      case SIGN:      case SIGN:
1149      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1150      break;      break;
1151      case ABS:      case ABS:
1152      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1153      break;      break;
1154      case NEG:      case NEG:
1155      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 357  DataLazy::resolveSample(ValueType& v,int Line 1160  DataLazy::resolveSample(ValueType& v,int
1160      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1161      break;      break;
1162      case EXP:      case EXP:
1163      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1164      break;      break;
1165      case SQRT:      case SQRT:
1166      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1167      break;      break;
1168      case RECIP:      case RECIP:
1169      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 377  DataLazy::resolveSample(ValueType& v,int Line 1180  DataLazy::resolveSample(ValueType& v,int
1180      case LEZ:      case LEZ:
1181      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1182      break;      break;
1183    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1184        case NEZ:
1185        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1186        break;
1187        case EZ:
1188        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1189        break;
1190    
1191      default:      default:
1192      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1193      }    }
1194      return &(m_samples);
1195    }
1196    
1197    
1198    const DataTypes::ValueType*
1199    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1200    {
1201        // we assume that any collapsing has been done before we get here
1202        // since we only have one argument we don't need to think about only
1203        // processing single points.
1204        // we will also know we won't get identity nodes
1205      if (m_readytype!='E')
1206      {
1207        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1208      }
1209      if (m_op==IDENTITY)
1210      {
1211        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1212      }
1213      size_t loffset=0;
1214      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1215    
1216      roffset=m_samplesize*tid;
1217      unsigned int ndpps=getNumDPPSample();
1218      unsigned int psize=DataTypes::noValues(m_left->getShape());
1219      double* result=&(m_samples[roffset]);
1220      switch (m_op)
1221      {
1222        case MINVAL:
1223        {
1224          for (unsigned int z=0;z<ndpps;++z)
1225          {
1226            FMin op;
1227            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1228            loffset+=psize;
1229            result++;
1230          }
1231        }
1232        break;
1233        case MAXVAL:
1234        {
1235          for (unsigned int z=0;z<ndpps;++z)
1236          {
1237          FMax op;
1238          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1239          loffset+=psize;
1240          result++;
1241          }
1242        }
1243        break;
1244        default:
1245        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1246      }
1247      return &(m_samples);
1248    }
1249    
1250    const DataTypes::ValueType*
1251    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1252    {
1253        // we assume that any collapsing has been done before we get here
1254        // since we only have one argument we don't need to think about only
1255        // processing single points.
1256      if (m_readytype!='E')
1257      {
1258        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1259    }    }
1260    return result;    if (m_op==IDENTITY)
1261      {
1262        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1263      }
1264      size_t subroffset;
1265      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1266      roffset=m_samplesize*tid;
1267      size_t loop=0;
1268      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1269      size_t step=getNoValues();
1270      size_t offset=roffset;
1271      switch (m_op)
1272      {
1273        case SYM:
1274        for (loop=0;loop<numsteps;++loop)
1275        {
1276            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1277            subroffset+=step;
1278            offset+=step;
1279        }
1280        break;
1281        case NSYM:
1282        for (loop=0;loop<numsteps;++loop)
1283        {
1284            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1285            subroffset+=step;
1286            offset+=step;
1287        }
1288        break;
1289        default:
1290        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1291      }
1292      return &m_samples;
1293    }
1294    
1295    const DataTypes::ValueType*
1296    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1297    {
1298        // we assume that any collapsing has been done before we get here
1299        // since we only have one argument we don't need to think about only
1300        // processing single points.
1301      if (m_readytype!='E')
1302      {
1303        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1304      }
1305      if (m_op==IDENTITY)
1306      {
1307        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1308      }
1309      size_t subroffset;
1310      size_t offset;
1311      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1312      roffset=m_samplesize*tid;
1313      offset=roffset;
1314      size_t loop=0;
1315      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1316      size_t outstep=getNoValues();
1317      size_t instep=m_left->getNoValues();
1318      switch (m_op)
1319      {
1320        case TRACE:
1321        for (loop=0;loop<numsteps;++loop)
1322        {
1323                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1324            subroffset+=instep;
1325            offset+=outstep;
1326        }
1327        break;
1328        case TRANS:
1329        for (loop=0;loop<numsteps;++loop)
1330        {
1331                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1332            subroffset+=instep;
1333            offset+=outstep;
1334        }
1335        break;
1336        default:
1337        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1338      }
1339      return &m_samples;
1340    }
1341    
1342    
1343    const DataTypes::ValueType*
1344    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1345    {
1346      if (m_readytype!='E')
1347      {
1348        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1349      }
1350      if (m_op==IDENTITY)
1351      {
1352        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1353      }
1354      size_t subroffset;
1355      size_t offset;
1356      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1357      roffset=m_samplesize*tid;
1358      offset=roffset;
1359      size_t loop=0;
1360      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1361      size_t outstep=getNoValues();
1362      size_t instep=m_left->getNoValues();
1363      switch (m_op)
1364      {
1365        case SWAP:
1366        for (loop=0;loop<numsteps;++loop)
1367        {
1368                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1369            subroffset+=instep;
1370            offset+=outstep;
1371        }
1372        break;
1373        default:
1374        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1375      }
1376      return &m_samples;
1377    }
1378    
1379    const DataTypes::ValueType*
1380    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1381    {
1382      if (m_readytype!='E')
1383      {
1384        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1385      }
1386      if (m_op!=CONDEVAL)
1387      {
1388        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1389      }
1390      size_t subroffset;
1391    
1392      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1393      const ValueType* srcres=0;
1394      if ((*maskres)[subroffset]>0)
1395      {
1396        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1397      }
1398      else
1399      {
1400        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1401      }
1402    
1403      // Now we need to copy the result
1404    
1405      roffset=m_samplesize*tid;
1406      for (int i=0;i<m_samplesize;++i)
1407      {
1408        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1409      }
1410    
1411      return &m_samples;
1412    }
1413    
1414    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1415    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1416    // If both children are expanded, then we can process them in a single operation (we treat
1417    // the whole sample as one big datapoint.
1418    // If one of the children is not expanded, then we need to treat each point in the sample
1419    // individually.
1420    // There is an additional complication when scalar operations are considered.
1421    // For example, 2+Vector.
1422    // In this case each double within the point is treated individually
1423    const DataTypes::ValueType*
1424    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1425    {
1426    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1427    
1428      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1429        // first work out which of the children are expanded
1430      bool leftExp=(m_left->m_readytype=='E');
1431      bool rightExp=(m_right->m_readytype=='E');
1432      if (!leftExp && !rightExp)
1433      {
1434        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1435      }
1436      bool leftScalar=(m_left->getRank()==0);
1437      bool rightScalar=(m_right->getRank()==0);
1438      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1439      {
1440        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1441      }
1442      size_t leftsize=m_left->getNoValues();
1443      size_t rightsize=m_right->getNoValues();
1444      size_t chunksize=1;           // how many doubles will be processed in one go
1445      int leftstep=0;       // how far should the left offset advance after each step
1446      int rightstep=0;
1447      int numsteps=0;       // total number of steps for the inner loop
1448      int oleftstep=0;  // the o variables refer to the outer loop
1449      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1450      int onumsteps=1;
1451      
1452      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1453      bool RES=(rightExp && rightScalar);
1454      bool LS=(!leftExp && leftScalar); // left is a single scalar
1455      bool RS=(!rightExp && rightScalar);
1456      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1457      bool RN=(!rightExp && !rightScalar);
1458      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1459      bool REN=(rightExp && !rightScalar);
1460    
1461      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1462      {
1463        chunksize=m_left->getNumDPPSample()*leftsize;
1464        leftstep=0;
1465        rightstep=0;
1466        numsteps=1;
1467      }
1468      else if (LES || RES)
1469      {
1470        chunksize=1;
1471        if (LES)        // left is an expanded scalar
1472        {
1473            if (RS)
1474            {
1475               leftstep=1;
1476               rightstep=0;
1477               numsteps=m_left->getNumDPPSample();
1478            }
1479            else        // RN or REN
1480            {
1481               leftstep=0;
1482               oleftstep=1;
1483               rightstep=1;
1484               orightstep=(RN ? -(int)rightsize : 0);
1485               numsteps=rightsize;
1486               onumsteps=m_left->getNumDPPSample();
1487            }
1488        }
1489        else        // right is an expanded scalar
1490        {
1491            if (LS)
1492            {
1493               rightstep=1;
1494               leftstep=0;
1495               numsteps=m_right->getNumDPPSample();
1496            }
1497            else
1498            {
1499               rightstep=0;
1500               orightstep=1;
1501               leftstep=1;
1502               oleftstep=(LN ? -(int)leftsize : 0);
1503               numsteps=leftsize;
1504               onumsteps=m_right->getNumDPPSample();
1505            }
1506        }
1507      }
1508      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1509      {
1510        if (LEN)    // and Right will be a single value
1511        {
1512            chunksize=rightsize;
1513            leftstep=rightsize;
1514            rightstep=0;
1515            numsteps=m_left->getNumDPPSample();
1516            if (RS)
1517            {
1518               numsteps*=leftsize;
1519            }
1520        }
1521        else    // REN
1522        {
1523            chunksize=leftsize;
1524            rightstep=leftsize;
1525            leftstep=0;
1526            numsteps=m_right->getNumDPPSample();
1527            if (LS)
1528            {
1529               numsteps*=rightsize;
1530            }
1531        }
1532      }
1533    
1534      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1535        // Get the values of sub-expressions
1536      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1537      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1538    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1539    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1540    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1541    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1542    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1543    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1544    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1545    
1546    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1547    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1548    
1549    
1550      roffset=m_samplesize*tid;
1551      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
1552      switch(m_op)
1553      {
1554        case ADD:
1555            PROC_OP(NO_ARG,plus<double>());
1556        break;
1557        case SUB:
1558        PROC_OP(NO_ARG,minus<double>());
1559        break;
1560        case MUL:
1561        PROC_OP(NO_ARG,multiplies<double>());
1562        break;
1563        case DIV:
1564        PROC_OP(NO_ARG,divides<double>());
1565        break;
1566        case POW:
1567           PROC_OP(double (double,double),::pow);
1568        break;
1569        default:
1570        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1571      }
1572    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1573      return &m_samples;
1574    }
1575    
1576    
1577    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1578    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1579    // unlike the other resolve helpers, we must treat these datapoints separately.
1580    const DataTypes::ValueType*
1581    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1582    {
1583    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1584    
1585      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1586        // first work out which of the children are expanded
1587      bool leftExp=(m_left->m_readytype=='E');
1588      bool rightExp=(m_right->m_readytype=='E');
1589      int steps=getNumDPPSample();
1590      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1591      int rightStep=(rightExp?m_right->getNoValues() : 0);
1592    
1593      int resultStep=getNoValues();
1594      roffset=m_samplesize*tid;
1595      size_t offset=roffset;
1596    
1597      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1598    
1599      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1600    
1601    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1602    cout << getNoValues() << endl;)
1603    
1604    
1605    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1606    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1607    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1608    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1609    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1610    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1611    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1612    
1613      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
1614      switch(m_op)
1615      {
1616        case PROD:
1617        for (int i=0;i<steps;++i,resultp+=resultStep)
1618        {
1619              const double *ptr_0 = &((*left)[lroffset]);
1620              const double *ptr_1 = &((*right)[rroffset]);
1621    
1622    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1623    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1624    
1625              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1626    
1627          lroffset+=leftStep;
1628          rroffset+=rightStep;
1629        }
1630        break;
1631        default:
1632        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1633      }
1634      roffset=offset;
1635      return &m_samples;
1636    }
1637    
1638    
1639    const DataTypes::ValueType*
1640    DataLazy::resolveSample(int sampleNo, size_t& roffset)
1641    {
1642    #ifdef _OPENMP
1643        int tid=omp_get_thread_num();
1644    #else
1645        int tid=0;
1646    #endif
1647    
1648    #ifdef LAZY_STACK_PROF
1649        stackstart[tid]=&tid;
1650        stackend[tid]=&tid;
1651        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1652        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1653        #pragma omp critical
1654        if (d>maxstackuse)
1655        {
1656    cout << "Max resolve Stack use " << d << endl;
1657            maxstackuse=d;
1658        }
1659        return r;
1660    #else
1661        return resolveNodeSample(tid, sampleNo, roffset);
1662    #endif
1663    }
1664    
1665    
1666    // This needs to do the work of the identity constructor
1667    void
1668    DataLazy::resolveToIdentity()
1669    {
1670       if (m_op==IDENTITY)
1671        return;
1672       DataReady_ptr p=resolveNodeWorker();
1673       makeIdentity(p);
1674  }  }
1675    
1676    void DataLazy::makeIdentity(const DataReady_ptr& p)
1677    {
1678       m_op=IDENTITY;
1679       m_axis_offset=0;
1680       m_transpose=0;
1681       m_SL=m_SM=m_SR=0;
1682       m_children=m_height=0;
1683       m_id=p;
1684       if(p->isConstant()) {m_readytype='C';}
1685       else if(p->isExpanded()) {m_readytype='E';}
1686       else if (p->isTagged()) {m_readytype='T';}
1687       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1688       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1689       m_left.reset();
1690       m_right.reset();
1691    }
1692    
1693    
1694  DataReady_ptr  DataReady_ptr
1695  DataLazy::resolve()  DataLazy::resolve()
1696  {  {
1697    // This is broken!     We need to have a buffer per thread!      resolveToIdentity();
1698    // so the allocation of v needs to move inside the loop somehow      return m_id;
1699    }
1700    
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
1701    
1702    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired)+1);  /* This is really a static method but I think that caused problems in windows */
1703    int numthreads=1;  void
1704    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1705    {
1706      if (dats.empty())
1707      {
1708        return;
1709      }
1710      vector<DataLazy*> work;
1711      FunctionSpace fs=dats[0]->getFunctionSpace();
1712      bool match=true;
1713      for (int i=dats.size()-1;i>=0;--i)
1714      {
1715        if (dats[i]->m_readytype!='E')
1716        {
1717            dats[i]->collapse();
1718        }
1719        if (dats[i]->m_op!=IDENTITY)
1720        {
1721            work.push_back(dats[i]);
1722            if (fs!=dats[i]->getFunctionSpace())
1723            {
1724                match=false;
1725            }
1726        }
1727      }
1728      if (work.empty())
1729      {
1730        return;     // no work to do
1731      }
1732      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1733      {     // it is possible that dats[0] is one of the objects which we discarded and
1734            // all the other functionspaces match.
1735        vector<DataExpanded*> dep;
1736        vector<ValueType*> vecs;
1737        for (int i=0;i<work.size();++i)
1738        {
1739            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1740            vecs.push_back(&(dep[i]->getVectorRW()));
1741        }
1742        int totalsamples=work[0]->getNumSamples();
1743        const ValueType* res=0; // Storage for answer
1744        int sample;
1745        #pragma omp parallel private(sample, res)
1746        {
1747            size_t roffset=0;
1748            #pragma omp for schedule(static)
1749            for (sample=0;sample<totalsamples;++sample)
1750            {
1751            roffset=0;
1752            int j;
1753            for (j=work.size()-1;j>=0;--j)
1754            {
1755  #ifdef _OPENMP  #ifdef _OPENMP
1756    numthreads=omp_get_max_threads();                  res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1757    int threadnum=0;  #else
1758  #endif                  res=work[j]->resolveNodeSample(0,sample,roffset);
1759    ValueType v(numthreads*threadbuffersize);  #endif
1760  cout << "Buffer created with size=" << v.size() << endl;                  DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1761    ValueType dummy(getNoValues());                  memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1762    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),dummy);          }
1763    ValueType& resvec=result->getVector();          }
1764        }
1765        // Now we need to load the new results as identity ops into the lazy nodes
1766        for (int i=work.size()-1;i>=0;--i)
1767        {
1768            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1769        }
1770      }
1771      else  // functionspaces do not match
1772      {
1773        for (int i=0;i<work.size();++i)
1774        {
1775            work[i]->resolveToIdentity();
1776        }
1777      }
1778    }
1779    
1780    
1781    
1782    // This version of resolve uses storage in each node to hold results
1783    DataReady_ptr
1784    DataLazy::resolveNodeWorker()
1785    {
1786      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1787      {
1788        collapse();
1789      }
1790      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
1791      {
1792        return m_id;
1793      }
1794        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1795      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1796      ValueType& resvec=result->getVectorRW();
1797    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1798    
1799    int sample;    int sample;
   int resoffset;  
1800    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1801    #pragma omp parallel for private(sample,resoffset,threadnum) schedule(static)    const ValueType* res=0;   // Storage for answer
1802    for (sample=0;sample<totalsamples;++sample)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1803      #pragma omp parallel private(sample,res)
1804    {    {
1805        size_t roffset=0;
1806    #ifdef LAZY_STACK_PROF
1807        stackstart[omp_get_thread_num()]=&roffset;
1808        stackend[omp_get_thread_num()]=&roffset;
1809    #endif
1810        #pragma omp for schedule(static)
1811        for (sample=0;sample<totalsamples;++sample)
1812        {
1813            roffset=0;
1814  #ifdef _OPENMP  #ifdef _OPENMP
1815      const double* res=resolveSample(v,sample,threadbuffersize*omp_get_thread_num());              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1816  #else  #else
1817      const double* res=resolveSample(v,sample,0);   // this would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1818  #endif  #endif
1819      resoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1820      for (int i=0;i<m_samplesize;++i,++resoffset)    // copy values into the output vector  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1821      {              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1822      resvec[resoffset]=res[i];              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1823      }      }
1824      }
1825    #ifdef LAZY_STACK_PROF
1826      for (int i=0;i<getNumberOfThreads();++i)
1827      {
1828        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1829    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1830        if (r>maxstackuse)
1831        {
1832            maxstackuse=r;
1833        }
1834    }    }
1835      cout << "Max resolve Stack use=" << maxstackuse << endl;
1836    #endif
1837    return resptr;    return resptr;
1838  }  }
1839    
# Line 430  std::string Line 1841  std::string
1841  DataLazy::toString() const  DataLazy::toString() const
1842  {  {
1843    ostringstream oss;    ostringstream oss;
1844    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1845    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1846      {
1847      case 1:   // tree format
1848        oss << endl;
1849        intoTreeString(oss,"");
1850        break;
1851      case 2:   // just the depth
1852        break;
1853      default:
1854        intoString(oss);
1855        break;
1856      }
1857    return oss.str();    return oss.str();
1858  }  }
1859    
1860    
1861  void  void
1862  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1863  {  {
1864    //    oss << "[" << m_children <<";"<<m_height <<"]";
1865    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1866    {    {
1867    case G_IDENTITY:    case G_IDENTITY:
1868        if (m_id->isExpanded())
1869        {
1870           oss << "E";
1871        }
1872        else if (m_id->isTagged())
1873        {
1874          oss << "T";
1875        }
1876        else if (m_id->isConstant())
1877        {
1878          oss << "C";
1879        }
1880        else
1881        {
1882          oss << "?";
1883        }
1884      oss << '@' << m_id.get();      oss << '@' << m_id.get();
1885      break;      break;
1886    case G_BINARY:    case G_BINARY:
# Line 451  DataLazy::intoString(ostringstream& oss) Line 1891  DataLazy::intoString(ostringstream& oss)
1891      oss << ')';      oss << ')';
1892      break;      break;
1893    case G_UNARY:    case G_UNARY:
1894      case G_UNARY_P:
1895      case G_NP1OUT:
1896      case G_NP1OUT_P:
1897      case G_REDUCTION:
1898        oss << opToString(m_op) << '(';
1899        m_left->intoString(oss);
1900        oss << ')';
1901        break;
1902      case G_TENSORPROD:
1903        oss << opToString(m_op) << '(';
1904        m_left->intoString(oss);
1905        oss << ", ";
1906        m_right->intoString(oss);
1907        oss << ')';
1908        break;
1909      case G_NP1OUT_2P:
1910      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1911      m_left->intoString(oss);      m_left->intoString(oss);
1912        oss << ", " << m_axis_offset << ", " << m_transpose;
1913        oss << ')';
1914        break;
1915      case G_CONDEVAL:
1916        oss << opToString(m_op)<< '(' ;
1917        m_mask->intoString(oss);
1918        oss << " ? ";
1919        m_left->intoString(oss);
1920        oss << " : ";
1921        m_right->intoString(oss);
1922      oss << ')';      oss << ')';
1923      break;      break;
1924    default:    default:
# Line 460  DataLazy::intoString(ostringstream& oss) Line 1926  DataLazy::intoString(ostringstream& oss)
1926    }    }
1927  }  }
1928    
1929  // Note that in this case, deepCopy does not make copies of the leaves.  
1930  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1931    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1932    {
1933      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1934      switch (getOpgroup(m_op))
1935      {
1936      case G_IDENTITY:
1937        if (m_id->isExpanded())
1938        {
1939           oss << "E";
1940        }
1941        else if (m_id->isTagged())
1942        {
1943          oss << "T";
1944        }
1945        else if (m_id->isConstant())
1946        {
1947          oss << "C";
1948        }
1949        else
1950        {
1951          oss << "?";
1952        }
1953        oss << '@' << m_id.get() << endl;
1954        break;
1955      case G_BINARY:
1956        oss << opToString(m_op) << endl;
1957        indent+='.';
1958        m_left->intoTreeString(oss, indent);
1959        m_right->intoTreeString(oss, indent);
1960        break;
1961      case G_UNARY:
1962      case G_UNARY_P:
1963      case G_NP1OUT:
1964      case G_NP1OUT_P:
1965      case G_REDUCTION:
1966        oss << opToString(m_op) << endl;
1967        indent+='.';
1968        m_left->intoTreeString(oss, indent);
1969        break;
1970      case G_TENSORPROD:
1971        oss << opToString(m_op) << endl;
1972        indent+='.';
1973        m_left->intoTreeString(oss, indent);
1974        m_right->intoTreeString(oss, indent);
1975        break;
1976      case G_NP1OUT_2P:
1977        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1978        indent+='.';
1979        m_left->intoTreeString(oss, indent);
1980        break;
1981      default:
1982        oss << "UNKNOWN";
1983      }
1984    }
1985    
1986    
1987  DataAbstract*  DataAbstract*
1988  DataLazy::deepCopy()  DataLazy::deepCopy()
1989  {  {
1990    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1991    {    {
1992      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1993      case G_UNARY:
1994      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1995      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1996      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1997      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1998      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1999      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2000      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001      default:
2002        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2003    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2004  }  }
2005    
2006    
2007    
2008    // There is no single, natural interpretation of getLength on DataLazy.
2009    // Instances of DataReady can look at the size of their vectors.
2010    // For lazy though, it could be the size the data would be if it were resolved;
2011    // or it could be some function of the lengths of the DataReady instances which
2012    // form part of the expression.
2013    // Rather than have people making assumptions, I have disabled the method.
2014  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2015  DataLazy::getLength() const  DataLazy::getLength() const
2016  {  {
2017    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2018  }  }
2019    
2020    
# Line 486  DataLazy::getSlice(const DataTypes::Regi Line 2024  DataLazy::getSlice(const DataTypes::Regi
2024    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2025  }  }
2026    
2027    
2028    // To do this we need to rely on our child nodes
2029    DataTypes::ValueType::size_type
2030    DataLazy::getPointOffset(int sampleNo,
2031                     int dataPointNo)
2032    {
2033      if (m_op==IDENTITY)
2034      {
2035        return m_id->getPointOffset(sampleNo,dataPointNo);
2036      }
2037      if (m_readytype!='E')
2038      {
2039        collapse();
2040        return m_id->getPointOffset(sampleNo,dataPointNo);
2041      }
2042      // at this point we do not have an identity node and the expression will be Expanded
2043      // so we only need to know which child to ask
2044      if (m_left->m_readytype=='E')
2045      {
2046        return m_left->getPointOffset(sampleNo,dataPointNo);
2047      }
2048      else
2049      {
2050        return m_right->getPointOffset(sampleNo,dataPointNo);
2051      }
2052    }
2053    
2054    // To do this we need to rely on our child nodes
2055  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2056  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2057                   int dataPointNo) const                   int dataPointNo) const
2058  {  {
2059    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2060      {
2061        return m_id->getPointOffset(sampleNo,dataPointNo);
2062      }
2063      if (m_readytype=='E')
2064      {
2065        // at this point we do not have an identity node and the expression will be Expanded
2066        // so we only need to know which child to ask
2067        if (m_left->m_readytype=='E')
2068        {
2069        return m_left->getPointOffset(sampleNo,dataPointNo);
2070        }
2071        else
2072        {
2073        return m_right->getPointOffset(sampleNo,dataPointNo);
2074        }
2075      }
2076      if (m_readytype=='C')
2077      {
2078        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2079      }
2080      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2081    }
2082    
2083    
2084    // I have decided to let Data:: handle this issue.
2085    void
2086    DataLazy::setToZero()
2087    {
2088    //   DataTypes::ValueType v(getNoValues(),0);
2089    //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2090    //   m_op=IDENTITY;
2091    //   m_right.reset();  
2092    //   m_left.reset();
2093    //   m_readytype='C';
2094    //   m_buffsRequired=1;
2095    
2096      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2097      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2098    }
2099    
2100    bool
2101    DataLazy::actsExpanded() const
2102    {
2103        return (m_readytype=='E');
2104  }  }
2105    
2106  }   // end namespace  }   // end namespace

Legend:
Removed from v.1888  
changed lines
  Added in v.4154

  ViewVC Help
Powered by ViewVC 1.1.26