/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger_upto1946/escript/src/DataLazy.cpp revision 1950 by jfenwick, Thu Oct 30 00:59:34 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 5464 by jfenwick, Sat Feb 14 00:25:03 2015 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2015 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
22  #ifdef _OPENMP  #ifdef _OPENMP
23  #include <omp.h>  #include <omp.h>
24  #endif  #endif
# Line 28  Line 28 
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29  #include "Utils.h"  #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #ifdef USE_NETCDF
34    #include <netcdfcpp.h>
35    #endif
36    
37    #include <iomanip>      // for some fancy formatting in debug
38    
39    // #define LAZYDEBUG(X) if (privdebug){X;}
40    #define LAZYDEBUG(X)
41    namespace
42    {
43    bool privdebug=false;
44    
45    #define ENABLEDEBUG privdebug=true;
46    #define DISABLEDEBUG privdebug=false;
47    }
48    
49    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
50    
51    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
52    
53    
54    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
55    
56  /*  /*
57  How does DataLazy work?  How does DataLazy work?
58  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 39  A special operation, IDENTITY, stores an Line 64  A special operation, IDENTITY, stores an
64  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
65    
66  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
67  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
68    
69  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
70  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 48  I will refer to individual DataLazy obje Line 73  I will refer to individual DataLazy obje
73  Each node also stores:  Each node also stores:
74  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
75      evaluated.      evaluated.
76  - m_length ~ how many values would be stored in the answer if the expression was evaluated.  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
 - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  
77      evaluate the expression.      evaluate the expression.
78  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
79    
# Line 70  The convention that I use, is that the r Line 94  The convention that I use, is that the r
94    
95  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
96  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
97    
98    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
99    1) Add to the ES_optype.
100    2) determine what opgroup your operation belongs to (X)
101    3) add a string for the op to the end of ES_opstrings
102    4) increase ES_opcount
103    5) add an entry (X) to opgroups
104    6) add an entry to the switch in collapseToReady
105    7) add an entry to resolveX
106  */  */
107    
108    
# Line 79  using namespace boost; Line 112  using namespace boost;
112  namespace escript  namespace escript
113  {  {
114    
 const std::string&  
 opToString(ES_optype op);  
   
115  namespace  namespace
116  {  {
117    
118    
119    // enabling this will print out when ever the maximum stacksize used by resolve increases
120    // it assumes _OPENMP is also in use
121    //#define LAZY_STACK_PROF
122    
123    
124    
125    #ifndef _OPENMP
126      #ifdef LAZY_STACK_PROF
127      #undef LAZY_STACK_PROF
128      #endif
129    #endif
130    
131    
132    #ifdef LAZY_STACK_PROF
133    std::vector<void*> stackstart(getNumberOfThreads());
134    std::vector<void*> stackend(getNumberOfThreads());
135    size_t maxstackuse=0;
136    #endif
137    
138  enum ES_opgroup  enum ES_opgroup
139  {  {
140     G_UNKNOWN,     G_UNKNOWN,
141     G_IDENTITY,     G_IDENTITY,
142     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
143     G_UNARY      // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
144       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
145       G_NP1OUT,        // non-pointwise op with one output
146       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
147       G_TENSORPROD,    // general tensor product
148       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
149       G_REDUCTION,     // non-pointwise unary op with a scalar output
150       G_CONDEVAL
151  };  };
152    
153    
# Line 101  string ES_opstrings[]={"UNKNOWN","IDENTI Line 158  string ES_opstrings[]={"UNKNOWN","IDENTI
158              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
159              "asinh","acosh","atanh",              "asinh","acosh","atanh",
160              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
161              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
162  int ES_opcount=33;              "symmetric","nonsymmetric",
163                "prod",
164                "transpose", "trace",
165                "swapaxes",
166                "minval", "maxval",
167                "condEval"};
168    int ES_opcount=44;
169  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
170              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
172              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
173              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
174              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
175                G_NP1OUT,G_NP1OUT,
176                G_TENSORPROD,
177                G_NP1OUT_P, G_NP1OUT_P,
178                G_NP1OUT_2P,
179                G_REDUCTION, G_REDUCTION,
180                G_CONDEVAL};
181  inline  inline
182  ES_opgroup  ES_opgroup
183  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 129  resultFS(DataAbstract_ptr left, DataAbst Line 198  resultFS(DataAbstract_ptr left, DataAbst
198    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
199    if (l!=r)    if (l!=r)
200    {    {
201      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
202        if (res==1)
203      {      {
204      return l;      return l;
205      }      }
206      if (l.probeInterpolation(r))      if (res==-1)
207      {      {
208      return r;      return r;
209      }      }
# Line 143  resultFS(DataAbstract_ptr left, DataAbst Line 213  resultFS(DataAbstract_ptr left, DataAbst
213  }  }
214    
215  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
216    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
217  DataTypes::ShapeType  DataTypes::ShapeType
218  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
219  {  {
220      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
221      {      {
222        if (getOpgroup(op)!=G_BINARY)        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
223        {        {
224          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
225        }        }
226    
227        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
228        {        {
229          return right->getShape();          return right->getShape();
# Line 165  resultShape(DataAbstract_ptr left, DataA Line 237  resultShape(DataAbstract_ptr left, DataA
237      return left->getShape();      return left->getShape();
238  }  }
239    
240  // determine the number of points in the result of "left op right"  // return the shape for "op left"
241  size_t  
242  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataTypes::ShapeType
243    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
244  {  {
245     switch (getOpgroup(op))      switch(op)
246     {      {
247     case G_BINARY: return left->getLength();          case TRANS:
248     case G_UNARY: return left->getLength();         {            // for the scoping of variables
249     default:          const DataTypes::ShapeType& s=left->getShape();
250      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
251     }          int rank=left->getRank();
252            if (axis_offset<0 || axis_offset>rank)
253            {
254                stringstream e;
255                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
256                throw DataException(e.str());
257            }
258            for (int i=0; i<rank; i++)
259            {
260               int index = (axis_offset+i)%rank;
261               sh.push_back(s[index]); // Append to new shape
262            }
263            return sh;
264           }
265        break;
266        case TRACE:
267           {
268            int rank=left->getRank();
269            if (rank<2)
270            {
271               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
272            }
273            if ((axis_offset>rank-2) || (axis_offset<0))
274            {
275               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
276            }
277            if (rank==2)
278            {
279               return DataTypes::scalarShape;
280            }
281            else if (rank==3)
282            {
283               DataTypes::ShapeType sh;
284                   if (axis_offset==0)
285               {
286                    sh.push_back(left->getShape()[2]);
287                   }
288                   else     // offset==1
289               {
290                sh.push_back(left->getShape()[0]);
291                   }
292               return sh;
293            }
294            else if (rank==4)
295            {
296               DataTypes::ShapeType sh;
297               const DataTypes::ShapeType& s=left->getShape();
298                   if (axis_offset==0)
299               {
300                    sh.push_back(s[2]);
301                    sh.push_back(s[3]);
302                   }
303                   else if (axis_offset==1)
304               {
305                    sh.push_back(s[0]);
306                    sh.push_back(s[3]);
307                   }
308               else     // offset==2
309               {
310                sh.push_back(s[0]);
311                sh.push_back(s[1]);
312               }
313               return sh;
314            }
315            else        // unknown rank
316            {
317               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
318            }
319           }
320        break;
321            default:
322        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
323        }
324  }  }
325    
326  // determine the number of samples requires to evaluate an expression combining left and right  DataTypes::ShapeType
327  int  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
328  {  {
329     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
330     {       // Some of the checks are probably redundant here
331     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
332     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
333     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
334     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
335      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
336     }       int rank=left->getRank();
337         if (rank<2) {
338            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
339         }
340         if (axis0<0 || axis0>rank-1) {
341            stringstream e;
342            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
343            throw DataException(e.str());
344         }
345         if (axis1<0 || axis1>rank-1) {
346            stringstream e;
347            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
348            throw DataException(e.str());
349         }
350         if (axis0 == axis1) {
351             throw DataException("Error - Data::swapaxes: axis indices must be different.");
352         }
353         if (axis0 > axis1) {
354             axis0_tmp=axis1;
355             axis1_tmp=axis0;
356         } else {
357             axis0_tmp=axis0;
358             axis1_tmp=axis1;
359         }
360         for (int i=0; i<rank; i++) {
361           if (i == axis0_tmp) {
362              out_shape.push_back(s[axis1_tmp]);
363           } else if (i == axis1_tmp) {
364              out_shape.push_back(s[axis0_tmp]);
365           } else {
366              out_shape.push_back(s[i]);
367           }
368         }
369        return out_shape;
370  }  }
371    
372    
373    // determine the output shape for the general tensor product operation
374    // the additional parameters return information required later for the product
375    // the majority of this code is copy pasted from C_General_Tensor_Product
376    DataTypes::ShapeType
377    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
378    {
379        
380      // Get rank and shape of inputs
381      int rank0 = left->getRank();
382      int rank1 = right->getRank();
383      const DataTypes::ShapeType& shape0 = left->getShape();
384      const DataTypes::ShapeType& shape1 = right->getShape();
385    
386      // Prepare for the loops of the product and verify compatibility of shapes
387      int start0=0, start1=0;
388      if (transpose == 0)       {}
389      else if (transpose == 1)  { start0 = axis_offset; }
390      else if (transpose == 2)  { start1 = rank1-axis_offset; }
391      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
392    
393      if (rank0<axis_offset)
394      {
395        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
396      }
397    
398      // Adjust the shapes for transpose
399      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
400      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
401      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
402      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
403    
404      // Prepare for the loops of the product
405      SL=1, SM=1, SR=1;
406      for (int i=0; i<rank0-axis_offset; i++)   {
407        SL *= tmpShape0[i];
408      }
409      for (int i=rank0-axis_offset; i<rank0; i++)   {
410        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
411          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
412        }
413        SM *= tmpShape0[i];
414      }
415      for (int i=axis_offset; i<rank1; i++)     {
416        SR *= tmpShape1[i];
417      }
418    
419      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
420      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
421      {         // block to limit the scope of out_index
422         int out_index=0;
423         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
424         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
425      }
426    
427      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
428      {
429         ostringstream os;
430         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
431         throw DataException(os.str());
432      }
433    
434      return shape2;
435    }
436    
437  }   // end anonymous namespace  }   // end anonymous namespace
438    
439    
# Line 208  opToString(ES_optype op) Line 449  opToString(ES_optype op)
449    return ES_opstrings[op];    return ES_opstrings[op];
450  }  }
451    
452    void DataLazy::LazyNodeSetup()
453    {
454    #ifdef _OPENMP
455        int numthreads=omp_get_max_threads();
456        m_samples.resize(numthreads*m_samplesize);
457        m_sampleids=new int[numthreads];
458        for (int i=0;i<numthreads;++i)
459        {
460            m_sampleids[i]=-1;  
461        }
462    #else
463        m_samples.resize(m_samplesize);
464        m_sampleids=new int[1];
465        m_sampleids[0]=-1;
466    #endif  // _OPENMP
467    }
468    
469    
470    // Creates an identity node
471  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
472      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
473      m_op(IDENTITY)      ,m_sampleids(0),
474        m_samples(1)
475  {  {
476     if (p->isLazy())     if (p->isLazy())
477     {     {
# Line 222  DataLazy::DataLazy(DataAbstract_ptr p) Line 482  DataLazy::DataLazy(DataAbstract_ptr p)
482     }     }
483     else     else
484     {     {
485      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
486      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
487      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
488      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
489     }     }
490     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
491  }  }
492    
   
   
   
493  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
494      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
495      m_op(op)      m_op(op),
496        m_axis_offset(0),
497        m_transpose(0),
498        m_SL(0), m_SM(0), m_SR(0)
499  {  {
500     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
501     {     {
502      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
503     }     }
504    
505     DataLazy_ptr lleft;     DataLazy_ptr lleft;
506     if (!left->isLazy())     if (!left->isLazy())
507     {     {
# Line 255  DataLazy::DataLazy(DataAbstract_ptr left Line 512  DataLazy::DataLazy(DataAbstract_ptr left
512      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
513     }     }
514     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
515     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
516     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
517       m_children=m_left->m_children+1;
518       m_height=m_left->m_height+1;
519       LazyNodeSetup();
520       SIZELIMIT
521  }  }
522    
523    
524  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
525  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
526      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
527      m_op(op)      m_op(op),
528        m_SL(0), m_SM(0), m_SR(0)
529  {  {
530     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
531       if ((getOpgroup(op)!=G_BINARY))
532     {     {
533      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
534     }     }
# Line 279  DataLazy::DataLazy(DataAbstract_ptr left Line 540  DataLazy::DataLazy(DataAbstract_ptr left
540      Data tmp(ltemp,fs);      Data tmp(ltemp,fs);
541      left=tmp.borrowDataPtr();      left=tmp.borrowDataPtr();
542     }     }
543     if (getFunctionSpace()!=right->getFunctionSpace())   // left needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
544     {     {
545      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
546      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
547    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
548     }     }
549     left->operandCheck(*right);     left->operandCheck(*right);
550    
551     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
552     {     {
553      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
554    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
555     }     }
556     else     else
557     {     {
558      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
559    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
560     }     }
561     if (right->isLazy())     if (right->isLazy())
562     {     {
563      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
564    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
565     }     }
566     else     else
567     {     {
568      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
569    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
570     }     }
571     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
572     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 316  DataLazy::DataLazy(DataAbstract_ptr left Line 582  DataLazy::DataLazy(DataAbstract_ptr left
582     {     {
583      m_readytype='C';      m_readytype='C';
584     }     }
585     m_length=resultLength(m_left,m_right,m_op);     m_samplesize=getNumDPPSample()*getNoValues();
586     m_samplesize=getNumDPPSample()*getNoValues();         m_children=m_left->m_children+m_right->m_children+2;
587     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
588  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
589       SIZELIMIT
590    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
591  }  }
592    
593    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
594        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
595        m_op(op),
596        m_axis_offset(axis_offset),
597        m_transpose(transpose)
598    {
599       if ((getOpgroup(op)!=G_TENSORPROD))
600       {
601        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
602       }
603       if ((transpose>2) || (transpose<0))
604       {
605        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
606       }
607       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
608       {
609        FunctionSpace fs=getFunctionSpace();
610        Data ltemp(left);
611        Data tmp(ltemp,fs);
612        left=tmp.borrowDataPtr();
613       }
614       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
615       {
616        Data tmp(Data(right),getFunctionSpace());
617        right=tmp.borrowDataPtr();
618       }
619    //    left->operandCheck(*right);
620    
621  DataLazy::~DataLazy()     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
622       {
623        m_left=dynamic_pointer_cast<DataLazy>(left);
624       }
625       else
626       {
627        m_left=DataLazy_ptr(new DataLazy(left));
628       }
629       if (right->isLazy())
630       {
631        m_right=dynamic_pointer_cast<DataLazy>(right);
632       }
633       else
634       {
635        m_right=DataLazy_ptr(new DataLazy(right));
636       }
637       char lt=m_left->m_readytype;
638       char rt=m_right->m_readytype;
639       if (lt=='E' || rt=='E')
640       {
641        m_readytype='E';
642       }
643       else if (lt=='T' || rt=='T')
644       {
645        m_readytype='T';
646       }
647       else
648       {
649        m_readytype='C';
650       }
651       m_samplesize=getNumDPPSample()*getNoValues();
652       m_children=m_left->m_children+m_right->m_children+2;
653       m_height=max(m_left->m_height,m_right->m_height)+1;
654       LazyNodeSetup();
655       SIZELIMIT
656    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
657    }
658    
659    
660    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
661        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
662        m_op(op),
663        m_axis_offset(axis_offset),
664        m_transpose(0),
665        m_tol(0)
666  {  {
667       if ((getOpgroup(op)!=G_NP1OUT_P))
668       {
669        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
670       }
671       DataLazy_ptr lleft;
672       if (!left->isLazy())
673       {
674        lleft=DataLazy_ptr(new DataLazy(left));
675       }
676       else
677       {
678        lleft=dynamic_pointer_cast<DataLazy>(left);
679       }
680       m_readytype=lleft->m_readytype;
681       m_left=lleft;
682       m_samplesize=getNumDPPSample()*getNoValues();
683       m_children=m_left->m_children+1;
684       m_height=m_left->m_height+1;
685       LazyNodeSetup();
686       SIZELIMIT
687    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
688    }
689    
690    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
691        : parent(left->getFunctionSpace(), left->getShape()),
692        m_op(op),
693        m_axis_offset(0),
694        m_transpose(0),
695        m_tol(tol)
696    {
697       if ((getOpgroup(op)!=G_UNARY_P))
698       {
699        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
700       }
701       DataLazy_ptr lleft;
702       if (!left->isLazy())
703       {
704        lleft=DataLazy_ptr(new DataLazy(left));
705       }
706       else
707       {
708        lleft=dynamic_pointer_cast<DataLazy>(left);
709       }
710       m_readytype=lleft->m_readytype;
711       m_left=lleft;
712       m_samplesize=getNumDPPSample()*getNoValues();
713       m_children=m_left->m_children+1;
714       m_height=m_left->m_height+1;
715       LazyNodeSetup();
716       SIZELIMIT
717    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
718  }  }
719    
720    
721  int  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
722  DataLazy::getBuffsRequired() const      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
723        m_op(op),
724        m_axis_offset(axis0),
725        m_transpose(axis1),
726        m_tol(0)
727  {  {
728      return m_buffsRequired;     if ((getOpgroup(op)!=G_NP1OUT_2P))
729       {
730        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
731       }
732       DataLazy_ptr lleft;
733       if (!left->isLazy())
734       {
735        lleft=DataLazy_ptr(new DataLazy(left));
736       }
737       else
738       {
739        lleft=dynamic_pointer_cast<DataLazy>(left);
740       }
741       m_readytype=lleft->m_readytype;
742       m_left=lleft;
743       m_samplesize=getNumDPPSample()*getNoValues();
744       m_children=m_left->m_children+1;
745       m_height=m_left->m_height+1;
746       LazyNodeSetup();
747       SIZELIMIT
748    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
749    }
750    
751    
752    namespace
753    {
754    
755        inline int max3(int a, int b, int c)
756        {
757        int t=(a>b?a:b);
758        return (t>c?t:c);
759    
760        }
761    }
762    
763    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
764        : parent(left->getFunctionSpace(), left->getShape()),
765        m_op(CONDEVAL),
766        m_axis_offset(0),
767        m_transpose(0),
768        m_tol(0)
769    {
770    
771       DataLazy_ptr lmask;
772       DataLazy_ptr lleft;
773       DataLazy_ptr lright;
774       if (!mask->isLazy())
775       {
776        lmask=DataLazy_ptr(new DataLazy(mask));
777       }
778       else
779       {
780        lmask=dynamic_pointer_cast<DataLazy>(mask);
781       }
782       if (!left->isLazy())
783       {
784        lleft=DataLazy_ptr(new DataLazy(left));
785       }
786       else
787       {
788        lleft=dynamic_pointer_cast<DataLazy>(left);
789       }
790       if (!right->isLazy())
791       {
792        lright=DataLazy_ptr(new DataLazy(right));
793       }
794       else
795       {
796        lright=dynamic_pointer_cast<DataLazy>(right);
797       }
798       m_readytype=lmask->m_readytype;
799       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
800       {
801        throw DataException("Programmer Error - condEval arguments must have the same readytype");
802       }
803       m_left=lleft;
804       m_right=lright;
805       m_mask=lmask;
806       m_samplesize=getNumDPPSample()*getNoValues();
807       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
808       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
809       LazyNodeSetup();
810       SIZELIMIT
811    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
812    }
813    
814    
815    
816    DataLazy::~DataLazy()
817    {
818       delete[] m_sampleids;
819  }  }
820    
821    
# Line 341  DataLazy::getBuffsRequired() const Line 825  DataLazy::getBuffsRequired() const
825    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
826  */  */
827  DataReady_ptr  DataReady_ptr
828  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
829  {  {
830    if (m_readytype=='E')    if (m_readytype=='E')
831    { // this is more an efficiency concern than anything else    { // this is more an efficiency concern than anything else
# Line 354  DataLazy::collapseToReady() Line 838  DataLazy::collapseToReady()
838    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
839    Data left(pleft);    Data left(pleft);
840    Data right;    Data right;
841    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
842    {    {
843      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
844    }    }
# Line 453  DataLazy::collapseToReady() Line 937  DataLazy::collapseToReady()
937      case LEZ:      case LEZ:
938      result=left.whereNonPositive();      result=left.whereNonPositive();
939      break;      break;
940        case NEZ:
941        result=left.whereNonZero(m_tol);
942        break;
943        case EZ:
944        result=left.whereZero(m_tol);
945        break;
946        case SYM:
947        result=left.symmetric();
948        break;
949        case NSYM:
950        result=left.nonsymmetric();
951        break;
952        case PROD:
953        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
954        break;
955        case TRANS:
956        result=left.transpose(m_axis_offset);
957        break;
958        case TRACE:
959        result=left.trace(m_axis_offset);
960        break;
961        case SWAP:
962        result=left.swapaxes(m_axis_offset, m_transpose);
963        break;
964        case MINVAL:
965        result=left.minval();
966        break;
967        case MAXVAL:
968        result=left.minval();
969        break;
970      default:      default:
971      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
972    }    }
973    return result.borrowReadyPtr();    return result.borrowReadyPtr();
974  }  }
# Line 466  DataLazy::collapseToReady() Line 980  DataLazy::collapseToReady()
980     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
981  */  */
982  void  void
983  DataLazy::collapse()  DataLazy::collapse() const
984  {  {
985    if (m_op==IDENTITY)    if (m_op==IDENTITY)
986    {    {
# Line 480  DataLazy::collapse() Line 994  DataLazy::collapse()
994    m_op=IDENTITY;    m_op=IDENTITY;
995  }  }
996    
997  /*  
998    \brief Compute the value of the expression (binary operation) for the given sample.  
999    \return Vector which stores the value of the subexpression for the given sample.  
1000    \param v A vector to store intermediate results.  
1001    \param offset Index in v to begin storing results.  
1002    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1003    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1004        {\
1005    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1006    If the result is stored in v it should be stored at the offset given.        { \
1007    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1008  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1009  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1010  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1011             lroffset+=leftstep; \
1012             rroffset+=rightstep; \
1013          }\
1014          lroffset+=oleftstep;\
1015          rroffset+=orightstep;\
1016        }
1017    
1018    
1019    // The result will be stored in m_samples
1020    // The return value is a pointer to the DataVector, offset is the offset within the return value
1021    const DataTypes::ValueType*
1022    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1023    {
1024    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1025        // collapse so we have a 'E' node or an IDENTITY for some other type
1026      if (m_readytype!='E' && m_op!=IDENTITY)
1027      {
1028        collapse();
1029      }
1030      if (m_op==IDENTITY)  
1031      {
1032        const ValueType& vec=m_id->getVectorRO();
1033        roffset=m_id->getPointOffset(sampleNo, 0);
1034    #ifdef LAZY_STACK_PROF
1035    int x;
1036    if (&x<stackend[omp_get_thread_num()])
1037    {
1038           stackend[omp_get_thread_num()]=&x;
1039    }
1040    #endif
1041        return &(vec);
1042      }
1043      if (m_readytype!='E')
1044      {
1045        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1046      }
1047      if (m_sampleids[tid]==sampleNo)
1048      {
1049        roffset=tid*m_samplesize;
1050        return &(m_samples);        // sample is already resolved
1051      }
1052      m_sampleids[tid]=sampleNo;
1053    
1054      switch (getOpgroup(m_op))
1055      {
1056      case G_UNARY:
1057      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1058      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1059      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1060      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1061      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1062      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1063      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1064      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1065      default:
1066        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1067      }
1068    }
1069    
1070    const DataTypes::ValueType*
1071    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1072  {  {
1073      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1074      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1075      // processing single points.      // processing single points.
1076        // we will also know we won't get identity nodes
1077    if (m_readytype!='E')    if (m_readytype!='E')
1078    {    {
1079      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1080    }    }
1081    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1082    const double* left=&((*vleft)[roffset]);    {
1083    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1084    roffset=offset;    }
1085      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1086      const double* left=&((*leftres)[roffset]);
1087      roffset=m_samplesize*tid;
1088      double* result=&(m_samples[roffset]);
1089    switch (m_op)    switch (m_op)
1090    {    {
1091      case SIN:        case SIN:  
1092      tensor_unary_operation(m_samplesize, left, result, ::sin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1093      break;      break;
1094      case COS:      case COS:
1095      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1096      break;      break;
1097      case TAN:      case TAN:
1098      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1099      break;      break;
1100      case ASIN:      case ASIN:
1101      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1102      break;      break;
1103      case ACOS:      case ACOS:
1104      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1105      break;      break;
1106      case ATAN:      case ATAN:
1107      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1108      break;      break;
1109      case SINH:      case SINH:
1110      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1111      break;      break;
1112      case COSH:      case COSH:
1113      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1114      break;      break;
1115      case TANH:      case TANH:
1116      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1117      break;      break;
1118      case ERF:      case ERF:
1119  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1120      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1121  #else  #else
1122      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1123      break;      break;
1124  #endif  #endif
1125     case ASINH:     case ASINH:
1126  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1127      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1128  #else  #else
1129      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1130  #endif    #endif  
1131      break;      break;
1132     case ACOSH:     case ACOSH:
1133  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1134      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1135  #else  #else
1136      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1137  #endif    #endif  
1138      break;      break;
1139     case ATANH:     case ATANH:
1140  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1141      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1142  #else  #else
1143      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1144  #endif    #endif  
1145      break;      break;
1146      case LOG10:      case LOG10:
1147      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1148      break;      break;
1149      case LOG:      case LOG:
1150      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1151      break;      break;
1152      case SIGN:      case SIGN:
1153      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1154      break;      break;
1155      case ABS:      case ABS:
1156      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1157      break;      break;
1158      case NEG:      case NEG:
1159      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 584  DataLazy::resolveUnary(ValueType& v, siz Line 1164  DataLazy::resolveUnary(ValueType& v, siz
1164      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1165      break;      break;
1166      case EXP:      case EXP:
1167      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1168      break;      break;
1169      case SQRT:      case SQRT:
1170      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1171      break;      break;
1172      case RECIP:      case RECIP:
1173      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 604  DataLazy::resolveUnary(ValueType& v, siz Line 1184  DataLazy::resolveUnary(ValueType& v, siz
1184      case LEZ:      case LEZ:
1185      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1186      break;      break;
1187    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1188        case NEZ:
1189        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1190        break;
1191        case EZ:
1192        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1193        break;
1194    
1195      default:      default:
1196      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1197    }    }
1198    return &v;    return &(m_samples);
1199  }  }
1200    
1201    
1202    const DataTypes::ValueType*
1203    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1204    {
1205        // we assume that any collapsing has been done before we get here
1206        // since we only have one argument we don't need to think about only
1207        // processing single points.
1208        // we will also know we won't get identity nodes
1209      if (m_readytype!='E')
1210      {
1211        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1212      }
1213      if (m_op==IDENTITY)
1214      {
1215        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1216      }
1217      size_t loffset=0;
1218      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1219    
1220      roffset=m_samplesize*tid;
1221      unsigned int ndpps=getNumDPPSample();
1222      unsigned int psize=DataTypes::noValues(m_left->getShape());
1223      double* result=&(m_samples[roffset]);
1224      switch (m_op)
1225      {
1226        case MINVAL:
1227        {
1228          for (unsigned int z=0;z<ndpps;++z)
1229          {
1230            FMin op;
1231            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1232            loffset+=psize;
1233            result++;
1234          }
1235        }
1236        break;
1237        case MAXVAL:
1238        {
1239          for (unsigned int z=0;z<ndpps;++z)
1240          {
1241          FMax op;
1242          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1243          loffset+=psize;
1244          result++;
1245          }
1246        }
1247        break;
1248        default:
1249        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1250      }
1251      return &(m_samples);
1252    }
1253    
1254    const DataTypes::ValueType*
1255    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1256    {
1257        // we assume that any collapsing has been done before we get here
1258        // since we only have one argument we don't need to think about only
1259        // processing single points.
1260      if (m_readytype!='E')
1261      {
1262        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1263      }
1264      if (m_op==IDENTITY)
1265      {
1266        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1267      }
1268      size_t subroffset;
1269      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1270      roffset=m_samplesize*tid;
1271      size_t loop=0;
1272      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1273      size_t step=getNoValues();
1274      size_t offset=roffset;
1275      switch (m_op)
1276      {
1277        case SYM:
1278        for (loop=0;loop<numsteps;++loop)
1279        {
1280            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1281            subroffset+=step;
1282            offset+=step;
1283        }
1284        break;
1285        case NSYM:
1286        for (loop=0;loop<numsteps;++loop)
1287        {
1288            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1289            subroffset+=step;
1290            offset+=step;
1291        }
1292        break;
1293        default:
1294        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1295      }
1296      return &m_samples;
1297    }
1298    
1299    const DataTypes::ValueType*
1300    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1301    {
1302        // we assume that any collapsing has been done before we get here
1303        // since we only have one argument we don't need to think about only
1304        // processing single points.
1305      if (m_readytype!='E')
1306      {
1307        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1308      }
1309      if (m_op==IDENTITY)
1310      {
1311        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1312      }
1313      size_t subroffset;
1314      size_t offset;
1315      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1316      roffset=m_samplesize*tid;
1317      offset=roffset;
1318      size_t loop=0;
1319      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1320      size_t outstep=getNoValues();
1321      size_t instep=m_left->getNoValues();
1322      switch (m_op)
1323      {
1324        case TRACE:
1325        for (loop=0;loop<numsteps;++loop)
1326        {
1327                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1328            subroffset+=instep;
1329            offset+=outstep;
1330        }
1331        break;
1332        case TRANS:
1333        for (loop=0;loop<numsteps;++loop)
1334        {
1335                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1336            subroffset+=instep;
1337            offset+=outstep;
1338        }
1339        break;
1340        default:
1341        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1342      }
1343      return &m_samples;
1344    }
1345    
1346    
1347  #define PROC_OP(X) \  const DataTypes::ValueType*
1348      for (int i=0;i<steps;++i,resultp+=resultStep) \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1349      { \  {
1350         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    if (m_readytype!='E')
1351         lroffset+=leftStep; \    {
1352         rroffset+=rightStep; \      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1353      }
1354      if (m_op==IDENTITY)
1355      {
1356        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1357      }
1358      size_t subroffset;
1359      size_t offset;
1360      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1361      roffset=m_samplesize*tid;
1362      offset=roffset;
1363      size_t loop=0;
1364      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1365      size_t outstep=getNoValues();
1366      size_t instep=m_left->getNoValues();
1367      switch (m_op)
1368      {
1369        case SWAP:
1370        for (loop=0;loop<numsteps;++loop)
1371        {
1372                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1373            subroffset+=instep;
1374            offset+=outstep;
1375      }      }
1376        break;
1377        default:
1378        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1379      }
1380      return &m_samples;
1381    }
1382    
1383    const DataTypes::ValueType*
1384    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1385    {
1386      if (m_readytype!='E')
1387      {
1388        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1389      }
1390      if (m_op!=CONDEVAL)
1391      {
1392        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1393      }
1394      size_t subroffset;
1395    
1396      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1397      const ValueType* srcres=0;
1398      if ((*maskres)[subroffset]>0)
1399      {
1400        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1401      }
1402      else
1403      {
1404        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1405      }
1406    
1407      // Now we need to copy the result
1408    
1409      roffset=m_samplesize*tid;
1410      for (int i=0;i<m_samplesize;++i)
1411      {
1412        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1413      }
1414    
1415      return &m_samples;
1416    }
1417    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1418  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1419  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1420  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 644  DataLazy::resolveUnary(ValueType& v, siz Line 1424  DataLazy::resolveUnary(ValueType& v, siz
1424  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1425  // For example, 2+Vector.  // For example, 2+Vector.
1426  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1427  DataTypes::ValueType*  const DataTypes::ValueType*
1428  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1429  {  {
1430  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1431    
1432    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1433      // first work out which of the children are expanded      // first work out which of the children are expanded
1434    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1435    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1436    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1437    int steps=(bigloops?1:getNumDPPSample());    {
1438    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1439    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1440    {    bool leftScalar=(m_left->getRank()==0);
1441      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1442      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1443      chunksize=1;    // for scalar    {
1444    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1445    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1446    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1447    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1448      size_t chunksize=1;           // how many doubles will be processed in one go
1449      int leftstep=0;       // how far should the left offset advance after each step
1450      int rightstep=0;
1451      int numsteps=0;       // total number of steps for the inner loop
1452      int oleftstep=0;  // the o variables refer to the outer loop
1453      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1454      int onumsteps=1;
1455      
1456      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1457      bool RES=(rightExp && rightScalar);
1458      bool LS=(!leftExp && leftScalar); // left is a single scalar
1459      bool RS=(!rightExp && rightScalar);
1460      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1461      bool RN=(!rightExp && !rightScalar);
1462      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1463      bool REN=(rightExp && !rightScalar);
1464    
1465      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1466      {
1467        chunksize=m_left->getNumDPPSample()*leftsize;
1468        leftstep=0;
1469        rightstep=0;
1470        numsteps=1;
1471      }
1472      else if (LES || RES)
1473      {
1474        chunksize=1;
1475        if (LES)        // left is an expanded scalar
1476        {
1477            if (RS)
1478            {
1479               leftstep=1;
1480               rightstep=0;
1481               numsteps=m_left->getNumDPPSample();
1482            }
1483            else        // RN or REN
1484            {
1485               leftstep=0;
1486               oleftstep=1;
1487               rightstep=1;
1488               orightstep=(RN ? -(int)rightsize : 0);
1489               numsteps=rightsize;
1490               onumsteps=m_left->getNumDPPSample();
1491            }
1492        }
1493        else        // right is an expanded scalar
1494        {
1495            if (LS)
1496            {
1497               rightstep=1;
1498               leftstep=0;
1499               numsteps=m_right->getNumDPPSample();
1500            }
1501            else
1502            {
1503               rightstep=0;
1504               orightstep=1;
1505               leftstep=1;
1506               oleftstep=(LN ? -(int)leftsize : 0);
1507               numsteps=leftsize;
1508               onumsteps=m_right->getNumDPPSample();
1509            }
1510        }
1511      }
1512      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1513      {
1514        if (LEN)    // and Right will be a single value
1515        {
1516            chunksize=rightsize;
1517            leftstep=rightsize;
1518            rightstep=0;
1519            numsteps=m_left->getNumDPPSample();
1520            if (RS)
1521            {
1522               numsteps*=leftsize;
1523            }
1524        }
1525        else    // REN
1526        {
1527            chunksize=leftsize;
1528            rightstep=leftsize;
1529            leftstep=0;
1530            numsteps=m_right->getNumDPPSample();
1531            if (LS)
1532            {
1533               numsteps*=rightsize;
1534            }
1535        }
1536      }
1537    
1538      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1539      // Get the values of sub-expressions      // Get the values of sub-expressions
1540    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1541    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1542      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1543    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1544    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1545    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1546    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1547    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1548    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1549    
1550    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1551    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1552    
1553    
1554      roffset=m_samplesize*tid;
1555      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1556    switch(m_op)    switch(m_op)
1557    {    {
1558      case ADD:      case ADD:
1559      PROC_OP(plus<double>());          PROC_OP(NO_ARG,plus<double>());
1560      break;      break;
1561      case SUB:      case SUB:
1562      PROC_OP(minus<double>());      PROC_OP(NO_ARG,minus<double>());
1563      break;      break;
1564      case MUL:      case MUL:
1565      PROC_OP(multiplies<double>());      PROC_OP(NO_ARG,multiplies<double>());
1566      break;      break;
1567      case DIV:      case DIV:
1568      PROC_OP(divides<double>());      PROC_OP(NO_ARG,divides<double>());
1569      break;      break;
1570      case POW:      case POW:
1571      PROC_OP(::pow);         PROC_OP(double (double,double),::pow);
1572      break;      break;
1573      default:      default:
1574      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1575    }    }
1576    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1577    return &v;    return &m_samples;
1578  }  }
1579    
1580    
1581    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1582    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1583    // unlike the other resolve helpers, we must treat these datapoints separately.
1584    const DataTypes::ValueType*
1585    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1586    {
1587    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1588    
1589  /*    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1590    \brief Compute the value of the expression for the given sample.      // first work out which of the children are expanded
1591    \return Vector which stores the value of the subexpression for the given sample.    bool leftExp=(m_left->m_readytype=='E');
1592    \param v A vector to store intermediate results.    bool rightExp=(m_right->m_readytype=='E');
1593    \param offset Index in v to begin storing results.    int steps=getNumDPPSample();
1594    \param sampleNo Sample number to evaluate.    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1595    \param roffset (output parameter) the offset in the return vector where the result begins.    int rightStep=(rightExp?m_right->getNoValues() : 0);
1596    
1597      int resultStep=getNoValues();
1598      roffset=m_samplesize*tid;
1599      size_t offset=roffset;
1600    
1601      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1602    
1603      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1604    
1605    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1606    cout << getNoValues() << endl;)
1607    
1608    
1609    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1610    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1611    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1612    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1613    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1614    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1615    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1616    
1617      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1618      switch(m_op)
1619      {
1620        case PROD:
1621        for (int i=0;i<steps;++i,resultp+=resultStep)
1622        {
1623              const double *ptr_0 = &((*left)[lroffset]);
1624              const double *ptr_1 = &((*right)[rroffset]);
1625    
1626    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1627    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1628    
1629              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1630    
1631          lroffset+=leftStep;
1632          rroffset+=rightStep;
1633        }
1634        break;
1635        default:
1636        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1637      }
1638      roffset=offset;
1639      return &m_samples;
1640    }
1641    
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1642    
 // the roffset is the offset within the returned vector where the data begins  
1643  const DataTypes::ValueType*  const DataTypes::ValueType*
1644  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1645  {  {
1646  cout << "Resolve sample " << toString() << endl;  #ifdef _OPENMP
1647      // collapse so we have a 'E' node or an IDENTITY for some other type      int tid=omp_get_thread_num();
1648    if (m_readytype!='E' && m_op!=IDENTITY)  #else
1649        int tid=0;
1650    #endif
1651    
1652    #ifdef LAZY_STACK_PROF
1653        stackstart[tid]=&tid;
1654        stackend[tid]=&tid;
1655        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1656        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1657        #pragma omp critical
1658        if (d>maxstackuse)
1659        {
1660    cout << "Max resolve Stack use " << d << endl;
1661            maxstackuse=d;
1662        }
1663        return r;
1664    #else
1665        return resolveNodeSample(tid, sampleNo, roffset);
1666    #endif
1667    }
1668    
1669    
1670    // This needs to do the work of the identity constructor
1671    void
1672    DataLazy::resolveToIdentity()
1673    {
1674       if (m_op==IDENTITY)
1675        return;
1676       DataReady_ptr p=resolveNodeWorker();
1677       makeIdentity(p);
1678    }
1679    
1680    void DataLazy::makeIdentity(const DataReady_ptr& p)
1681    {
1682       m_op=IDENTITY;
1683       m_axis_offset=0;
1684       m_transpose=0;
1685       m_SL=m_SM=m_SR=0;
1686       m_children=m_height=0;
1687       m_id=p;
1688       if(p->isConstant()) {m_readytype='C';}
1689       else if(p->isExpanded()) {m_readytype='E';}
1690       else if (p->isTagged()) {m_readytype='T';}
1691       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1692       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1693       m_left.reset();
1694       m_right.reset();
1695    }
1696    
1697    
1698    DataReady_ptr
1699    DataLazy::resolve()
1700    {
1701        resolveToIdentity();
1702        return m_id;
1703    }
1704    
1705    
1706    /* This is really a static method but I think that caused problems in windows */
1707    void
1708    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1709    {
1710      if (dats.empty())
1711    {    {
1712      collapse();      return;
1713    }    }
1714    if (m_op==IDENTITY)      vector<DataLazy*> work;
1715      FunctionSpace fs=dats[0]->getFunctionSpace();
1716      bool match=true;
1717      for (int i=dats.size()-1;i>=0;--i)
1718    {    {
1719      const ValueType& vec=m_id->getVector();      if (dats[i]->m_readytype!='E')
1720      if (m_readytype=='C')      {
1721      {          dats[i]->collapse();
1722      roffset=0;      }
1723      return &(vec);      if (dats[i]->m_op!=IDENTITY)
1724      }      {
1725      roffset=m_id->getPointOffset(sampleNo, 0);          work.push_back(dats[i]);
1726      return &(vec);          if (fs!=dats[i]->getFunctionSpace())
1727            {
1728                match=false;
1729            }
1730        }
1731    }    }
1732    if (m_readytype!='E')    if (work.empty())
1733    {    {
1734      throw DataException("Programmer Error - Collapse did not produce an expanded node.");      return;     // no work to do
1735    }    }
1736    switch (getOpgroup(m_op))    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1737      {     // it is possible that dats[0] is one of the objects which we discarded and
1738            // all the other functionspaces match.
1739        vector<DataExpanded*> dep;
1740        vector<ValueType*> vecs;
1741        for (int i=0;i<work.size();++i)
1742        {
1743            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1744            vecs.push_back(&(dep[i]->getVectorRW()));
1745        }
1746        int totalsamples=work[0]->getNumSamples();
1747        const ValueType* res=0; // Storage for answer
1748        int sample;
1749        #pragma omp parallel private(sample, res)
1750        {
1751            size_t roffset=0;
1752            #pragma omp for schedule(static)
1753            for (sample=0;sample<totalsamples;++sample)
1754            {
1755            roffset=0;
1756            int j;
1757            for (j=work.size()-1;j>=0;--j)
1758            {
1759    #ifdef _OPENMP
1760                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1761    #else
1762                    res=work[j]->resolveNodeSample(0,sample,roffset);
1763    #endif
1764                    DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1765                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1766            }
1767            }
1768        }
1769        // Now we need to load the new results as identity ops into the lazy nodes
1770        for (int i=work.size()-1;i>=0;--i)
1771        {
1772            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1773        }
1774      }
1775      else  // functionspaces do not match
1776    {    {
1777    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      for (int i=0;i<work.size();++i)
1778    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      {
1779    default:          work[i]->resolveToIdentity();
1780      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      }
1781    }    }
1782  }  }
1783    
1784    
1785  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1786  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1787  DataReady_ptr  DataReady_ptr
1788  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1789  {  {
   
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
1790    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1791    {    {
1792      collapse();      collapse();
# Line 764  cout << "Buffers=" << m_buffsRequired << Line 1796  cout << "Buffers=" << m_buffsRequired <<
1796      return m_id;      return m_id;
1797    }    }
1798      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired));    // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
   int threadnum=0;  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1799    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1800    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1801    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1802    
1803    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1804    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1805    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1806    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1807    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel private(sample,res)
1808    for (sample=0;sample<totalsamples;++sample)    {
1809    {      size_t roffset=0;
1810  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1811        stackstart[omp_get_thread_num()]=&roffset;
1812        stackend[omp_get_thread_num()]=&roffset;
1813    #endif
1814        #pragma omp for schedule(static)
1815        for (sample=0;sample<totalsamples;++sample)
1816        {
1817            roffset=0;
1818  #ifdef _OPENMP  #ifdef _OPENMP
1819      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1820  #else  #else
1821      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1822  #endif  #endif
1823  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1824      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1825  cerr << "offset=" << outoffset << endl;              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1826      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1827      {      }
1828      resvec[outoffset]=(*res)[resoffset];    }
1829      }  #ifdef LAZY_STACK_PROF
1830  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1831      {
1832        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1833    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1834        if (r>maxstackuse)
1835        {
1836            maxstackuse=r;
1837        }
1838    }    }
1839      cout << "Max resolve Stack use=" << maxstackuse << endl;
1840    #endif
1841    return resptr;    return resptr;
1842  }  }
1843    
# Line 806  std::string Line 1845  std::string
1845  DataLazy::toString() const  DataLazy::toString() const
1846  {  {
1847    ostringstream oss;    ostringstream oss;
1848    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1849    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1850      {
1851      case 1:   // tree format
1852        oss << endl;
1853        intoTreeString(oss,"");
1854        break;
1855      case 2:   // just the depth
1856        break;
1857      default:
1858        intoString(oss);
1859        break;
1860      }
1861    return oss.str();    return oss.str();
1862  }  }
1863    
# Line 815  DataLazy::toString() const Line 1865  DataLazy::toString() const
1865  void  void
1866  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1867  {  {
1868    //    oss << "[" << m_children <<";"<<m_height <<"]";
1869    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1870    {    {
1871    case G_IDENTITY:    case G_IDENTITY:
# Line 844  DataLazy::intoString(ostringstream& oss) Line 1895  DataLazy::intoString(ostringstream& oss)
1895      oss << ')';      oss << ')';
1896      break;      break;
1897    case G_UNARY:    case G_UNARY:
1898      case G_UNARY_P:
1899      case G_NP1OUT:
1900      case G_NP1OUT_P:
1901      case G_REDUCTION:
1902      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1903      m_left->intoString(oss);      m_left->intoString(oss);
1904      oss << ')';      oss << ')';
1905      break;      break;
1906      case G_TENSORPROD:
1907        oss << opToString(m_op) << '(';
1908        m_left->intoString(oss);
1909        oss << ", ";
1910        m_right->intoString(oss);
1911        oss << ')';
1912        break;
1913      case G_NP1OUT_2P:
1914        oss << opToString(m_op) << '(';
1915        m_left->intoString(oss);
1916        oss << ", " << m_axis_offset << ", " << m_transpose;
1917        oss << ')';
1918        break;
1919      case G_CONDEVAL:
1920        oss << opToString(m_op)<< '(' ;
1921        m_mask->intoString(oss);
1922        oss << " ? ";
1923        m_left->intoString(oss);
1924        oss << " : ";
1925        m_right->intoString(oss);
1926        oss << ')';
1927        break;
1928    default:    default:
1929      oss << "UNKNOWN";      oss << "UNKNOWN";
1930    }    }
1931  }  }
1932    
1933    
1934    void
1935    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1936    {
1937      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1938      switch (getOpgroup(m_op))
1939      {
1940      case G_IDENTITY:
1941        if (m_id->isExpanded())
1942        {
1943           oss << "E";
1944        }
1945        else if (m_id->isTagged())
1946        {
1947          oss << "T";
1948        }
1949        else if (m_id->isConstant())
1950        {
1951          oss << "C";
1952        }
1953        else
1954        {
1955          oss << "?";
1956        }
1957        oss << '@' << m_id.get() << endl;
1958        break;
1959      case G_BINARY:
1960        oss << opToString(m_op) << endl;
1961        indent+='.';
1962        m_left->intoTreeString(oss, indent);
1963        m_right->intoTreeString(oss, indent);
1964        break;
1965      case G_UNARY:
1966      case G_UNARY_P:
1967      case G_NP1OUT:
1968      case G_NP1OUT_P:
1969      case G_REDUCTION:
1970        oss << opToString(m_op) << endl;
1971        indent+='.';
1972        m_left->intoTreeString(oss, indent);
1973        break;
1974      case G_TENSORPROD:
1975        oss << opToString(m_op) << endl;
1976        indent+='.';
1977        m_left->intoTreeString(oss, indent);
1978        m_right->intoTreeString(oss, indent);
1979        break;
1980      case G_NP1OUT_2P:
1981        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1982        indent+='.';
1983        m_left->intoTreeString(oss, indent);
1984        break;
1985      default:
1986        oss << "UNKNOWN";
1987      }
1988    }
1989    
1990    
1991  DataAbstract*  DataAbstract*
1992  DataLazy::deepCopy()  DataLazy::deepCopy()
1993  {  {
1994    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1995    {    {
1996    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1997    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1998      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1999      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2000    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2001      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2002      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2004      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2005    default:    default:
2006      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2007    }    }
2008  }  }
2009    
2010    
2011    
2012    // There is no single, natural interpretation of getLength on DataLazy.
2013    // Instances of DataReady can look at the size of their vectors.
2014    // For lazy though, it could be the size the data would be if it were resolved;
2015    // or it could be some function of the lengths of the DataReady instances which
2016    // form part of the expression.
2017    // Rather than have people making assumptions, I have disabled the method.
2018  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2019  DataLazy::getLength() const  DataLazy::getLength() const
2020  {  {
2021    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2022  }  }
2023    
2024    
# Line 936  DataLazy::getPointOffset(int sampleNo, Line 2084  DataLazy::getPointOffset(int sampleNo,
2084    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2085  }  }
2086    
2087  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2088  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2089  void  void
2090  DataLazy::setToZero()  DataLazy::setToZero()
2091  {  {
2092    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
2093    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2094    m_op=IDENTITY;  //   m_op=IDENTITY;
2095    m_right.reset();    //   m_right.reset();  
2096    m_left.reset();  //   m_left.reset();
2097    m_readytype='C';  //   m_readytype='C';
2098    m_buffsRequired=1;  //   m_buffsRequired=1;
2099    
2100      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2101      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2102    }
2103    
2104    bool
2105    DataLazy::actsExpanded() const
2106    {
2107        return (m_readytype=='E');
2108  }  }
2109    
2110  }   // end namespace  }   // end namespace

Legend:
Removed from v.1950  
changed lines
  Added in v.5464

  ViewVC Help
Powered by ViewVC 1.1.26