/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2066 by jfenwick, Thu Nov 20 05:31:33 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 5448 by jfenwick, Fri Feb 6 05:31:37 2015 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2015 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    
18  #include "DataLazy.h"  #include "DataLazy.h"
19  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
20  #ifdef _OPENMP  #ifdef _OPENMP
21  #include <omp.h>  #include <omp.h>
22  #endif  #endif
# Line 28  Line 26 
26  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
27  #include "Utils.h"  #include "Utils.h"
28    
29    #include "EscriptParams.h"
30    
31    #ifdef USE_NETCDF
32    #include <netcdfcpp.h>
33    #endif
34    
35    #include <iomanip>      // for some fancy formatting in debug
36    
37    // #define LAZYDEBUG(X) if (privdebug){X;}
38    #define LAZYDEBUG(X)
39    namespace
40    {
41    bool privdebug=false;
42    
43    #define ENABLEDEBUG privdebug=true;
44    #define DISABLEDEBUG privdebug=false;
45    }
46    
47    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
48    
49    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
50    
51    
52    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
53    
54  /*  /*
55  How does DataLazy work?  How does DataLazy work?
56  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 39  A special operation, IDENTITY, stores an Line 62  A special operation, IDENTITY, stores an
62  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
63    
64  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
66    
67  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 48  I will refer to individual DataLazy obje Line 71  I will refer to individual DataLazy obje
71  Each node also stores:  Each node also stores:
72  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73      evaluated.      evaluated.
74  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75      evaluate the expression.      evaluate the expression.
76  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
77    
# Line 70  The convention that I use, is that the r Line 93  The convention that I use, is that the r
93  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
94  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
95    
96  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
97  1) Add to the ES_optype.  1) Add to the ES_optype.
98  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
99  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 90  namespace escript Line 113  namespace escript
113  namespace  namespace
114  {  {
115    
116    
117    // enabling this will print out when ever the maximum stacksize used by resolve increases
118    // it assumes _OPENMP is also in use
119    //#define LAZY_STACK_PROF
120    
121    
122    
123    #ifndef _OPENMP
124      #ifdef LAZY_STACK_PROF
125      #undef LAZY_STACK_PROF
126      #endif
127    #endif
128    
129    
130    #ifdef LAZY_STACK_PROF
131    std::vector<void*> stackstart(getNumberOfThreads());
132    std::vector<void*> stackend(getNumberOfThreads());
133    size_t maxstackuse=0;
134    #endif
135    
136  enum ES_opgroup  enum ES_opgroup
137  {  {
138     G_UNKNOWN,     G_UNKNOWN,
139     G_IDENTITY,     G_IDENTITY,
140     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
141     G_UNARY,     // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
142       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
143     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
144     G_TENSORPROD     // general tensor product     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
145       G_TENSORPROD,    // general tensor product
146       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
147       G_REDUCTION,     // non-pointwise unary op with a scalar output
148       G_CONDEVAL
149  };  };
150    
151    
# Line 108  string ES_opstrings[]={"UNKNOWN","IDENTI Line 156  string ES_opstrings[]={"UNKNOWN","IDENTI
156              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
157              "asinh","acosh","atanh",              "asinh","acosh","atanh",
158              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
159              "1/","where>0","where<0","where>=0","where<=0",              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
160              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
161              "prod"};              "prod",
162  int ES_opcount=36;              "transpose", "trace",
163                "swapaxes",
164                "minval", "maxval",
165                "condEval"};
166    int ES_opcount=44;
167  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
168              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
169              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
170              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
172              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
173              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
174              G_TENSORPROD};              G_TENSORPROD,
175                G_NP1OUT_P, G_NP1OUT_P,
176                G_NP1OUT_2P,
177                G_REDUCTION, G_REDUCTION,
178                G_CONDEVAL};
179  inline  inline
180  ES_opgroup  ES_opgroup
181  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 140  resultFS(DataAbstract_ptr left, DataAbst Line 196  resultFS(DataAbstract_ptr left, DataAbst
196    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
197    if (l!=r)    if (l!=r)
198    {    {
199      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
200        if (res==1)
201      {      {
202      return l;      return l;
203      }      }
204      if (l.probeInterpolation(r))      if (res==-1)
205      {      {
206      return r;      return r;
207      }      }
# Line 164  resultShape(DataAbstract_ptr left, DataA Line 221  resultShape(DataAbstract_ptr left, DataA
221        {        {
222          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
223        }        }
224    
225        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
226        {        {
227          return right->getShape();          return right->getShape();
# Line 177  resultShape(DataAbstract_ptr left, DataA Line 235  resultShape(DataAbstract_ptr left, DataA
235      return left->getShape();      return left->getShape();
236  }  }
237    
238    // return the shape for "op left"
239    
240    DataTypes::ShapeType
241    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
242    {
243        switch(op)
244        {
245            case TRANS:
246           {            // for the scoping of variables
247            const DataTypes::ShapeType& s=left->getShape();
248            DataTypes::ShapeType sh;
249            int rank=left->getRank();
250            if (axis_offset<0 || axis_offset>rank)
251            {
252                stringstream e;
253                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
254                throw DataException(e.str());
255            }
256            for (int i=0; i<rank; i++)
257            {
258               int index = (axis_offset+i)%rank;
259               sh.push_back(s[index]); // Append to new shape
260            }
261            return sh;
262           }
263        break;
264        case TRACE:
265           {
266            int rank=left->getRank();
267            if (rank<2)
268            {
269               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
270            }
271            if ((axis_offset>rank-2) || (axis_offset<0))
272            {
273               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
274            }
275            if (rank==2)
276            {
277               return DataTypes::scalarShape;
278            }
279            else if (rank==3)
280            {
281               DataTypes::ShapeType sh;
282                   if (axis_offset==0)
283               {
284                    sh.push_back(left->getShape()[2]);
285                   }
286                   else     // offset==1
287               {
288                sh.push_back(left->getShape()[0]);
289                   }
290               return sh;
291            }
292            else if (rank==4)
293            {
294               DataTypes::ShapeType sh;
295               const DataTypes::ShapeType& s=left->getShape();
296                   if (axis_offset==0)
297               {
298                    sh.push_back(s[2]);
299                    sh.push_back(s[3]);
300                   }
301                   else if (axis_offset==1)
302               {
303                    sh.push_back(s[0]);
304                    sh.push_back(s[3]);
305                   }
306               else     // offset==2
307               {
308                sh.push_back(s[0]);
309                sh.push_back(s[1]);
310               }
311               return sh;
312            }
313            else        // unknown rank
314            {
315               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
316            }
317           }
318        break;
319            default:
320        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
321        }
322    }
323    
324    DataTypes::ShapeType
325    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
326    {
327         // This code taken from the Data.cpp swapaxes() method
328         // Some of the checks are probably redundant here
329         int axis0_tmp,axis1_tmp;
330         const DataTypes::ShapeType& s=left->getShape();
331         DataTypes::ShapeType out_shape;
332         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
333         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
334         int rank=left->getRank();
335         if (rank<2) {
336            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
337         }
338         if (axis0<0 || axis0>rank-1) {
339            stringstream e;
340            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
341            throw DataException(e.str());
342         }
343         if (axis1<0 || axis1>rank-1) {
344            stringstream e;
345            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
346            throw DataException(e.str());
347         }
348         if (axis0 == axis1) {
349             throw DataException("Error - Data::swapaxes: axis indices must be different.");
350         }
351         if (axis0 > axis1) {
352             axis0_tmp=axis1;
353             axis1_tmp=axis0;
354         } else {
355             axis0_tmp=axis0;
356             axis1_tmp=axis1;
357         }
358         for (int i=0; i<rank; i++) {
359           if (i == axis0_tmp) {
360              out_shape.push_back(s[axis1_tmp]);
361           } else if (i == axis1_tmp) {
362              out_shape.push_back(s[axis0_tmp]);
363           } else {
364              out_shape.push_back(s[i]);
365           }
366         }
367        return out_shape;
368    }
369    
370    
371  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
372  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
373  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
# Line 197  GTPShape(DataAbstract_ptr left, DataAbst Line 388  GTPShape(DataAbstract_ptr left, DataAbst
388    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)  { start1 = rank1-axis_offset; }
389    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
390    
391      if (rank0<axis_offset)
392      {
393        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
394      }
395    
396    // Adjust the shapes for transpose    // Adjust the shapes for transpose
397    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
# Line 226  GTPShape(DataAbstract_ptr left, DataAbst Line 421  GTPShape(DataAbstract_ptr left, DataAbst
421       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
422       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
423    }    }
   return shape2;  
 }  
424    
425      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
426      {
427         ostringstream os;
428         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
429         throw DataException(os.str());
430      }
431    
432  // determine the number of points in the result of "left op right"    return shape2;
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
433  }  }
434    
   
435  }   // end anonymous namespace  }   // end anonymous namespace
436    
437    
# Line 279  opToString(ES_optype op) Line 447  opToString(ES_optype op)
447    return ES_opstrings[op];    return ES_opstrings[op];
448  }  }
449    
450    void DataLazy::LazyNodeSetup()
451    {
452    #ifdef _OPENMP
453        int numthreads=omp_get_max_threads();
454        m_samples.resize(numthreads*m_samplesize);
455        m_sampleids=new int[numthreads];
456        for (int i=0;i<numthreads;++i)
457        {
458            m_sampleids[i]=-1;  
459        }
460    #else
461        m_samples.resize(m_samplesize);
462        m_sampleids=new int[1];
463        m_sampleids[0]=-1;
464    #endif  // _OPENMP
465    }
466    
467    
468    // Creates an identity node
469  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
470      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
471      m_op(IDENTITY),      ,m_sampleids(0),
472      m_axis_offset(0),      m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
473  {  {
474     if (p->isLazy())     if (p->isLazy())
475     {     {
# Line 296  DataLazy::DataLazy(DataAbstract_ptr p) Line 480  DataLazy::DataLazy(DataAbstract_ptr p)
480     }     }
481     else     else
482     {     {
483      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
484      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
485      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
486      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
487     }     }
488     m_buffsRequired=1;  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
489  }  }
490    
   
   
   
491  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
492      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
493      m_op(op),      m_op(op),
494      m_axis_offset(0),      m_axis_offset(0),
495      m_transpose(0),      m_transpose(0),
496      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
497  {  {
498     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
499     {     {
500      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
501     }     }
# Line 334  DataLazy::DataLazy(DataAbstract_ptr left Line 511  DataLazy::DataLazy(DataAbstract_ptr left
511     }     }
512     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
513     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
514     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
515     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
516       m_height=m_left->m_height+1;
517       LazyNodeSetup();
518       SIZELIMIT
519  }  }
520    
521    
# Line 346  DataLazy::DataLazy(DataAbstract_ptr left Line 525  DataLazy::DataLazy(DataAbstract_ptr left
525      m_op(op),      m_op(op),
526      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
527  {  {
528    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
529     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
530     {     {
531      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
# Line 362  DataLazy::DataLazy(DataAbstract_ptr left Line 542  DataLazy::DataLazy(DataAbstract_ptr left
542     {     {
543      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
544      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
545    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
546     }     }
547     left->operandCheck(*right);     left->operandCheck(*right);
548    
549     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
550     {     {
551      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
552    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
553     }     }
554     else     else
555     {     {
556      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
557    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
558     }     }
559     if (right->isLazy())     if (right->isLazy())
560     {     {
561      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
562    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
563     }     }
564     else     else
565     {     {
566      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
567    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
568     }     }
569     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
570     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 396  DataLazy::DataLazy(DataAbstract_ptr left Line 581  DataLazy::DataLazy(DataAbstract_ptr left
581      m_readytype='C';      m_readytype='C';
582     }     }
583     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
584     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
585     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
586  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
587       SIZELIMIT
588    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
589  }  }
590    
591  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
# Line 427  DataLazy::DataLazy(DataAbstract_ptr left Line 614  DataLazy::DataLazy(DataAbstract_ptr left
614      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
615      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
616     }     }
617     left->operandCheck(*right);  //    left->operandCheck(*right);
618    
619     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
620     {     {
# Line 460  DataLazy::DataLazy(DataAbstract_ptr left Line 647  DataLazy::DataLazy(DataAbstract_ptr left
647      m_readytype='C';      m_readytype='C';
648     }     }
649     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
650     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
651     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
652  cout << "(4)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
653       SIZELIMIT
654    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
655  }  }
656    
657    
658  DataLazy::~DataLazy()  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
659        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
660        m_op(op),
661        m_axis_offset(axis_offset),
662        m_transpose(0),
663        m_tol(0)
664    {
665       if ((getOpgroup(op)!=G_NP1OUT_P))
666       {
667        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
668       }
669       DataLazy_ptr lleft;
670       if (!left->isLazy())
671       {
672        lleft=DataLazy_ptr(new DataLazy(left));
673       }
674       else
675       {
676        lleft=dynamic_pointer_cast<DataLazy>(left);
677       }
678       m_readytype=lleft->m_readytype;
679       m_left=lleft;
680       m_samplesize=getNumDPPSample()*getNoValues();
681       m_children=m_left->m_children+1;
682       m_height=m_left->m_height+1;
683       LazyNodeSetup();
684       SIZELIMIT
685    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
686    }
687    
688    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
689        : parent(left->getFunctionSpace(), left->getShape()),
690        m_op(op),
691        m_axis_offset(0),
692        m_transpose(0),
693        m_tol(tol)
694  {  {
695       if ((getOpgroup(op)!=G_UNARY_P))
696       {
697        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
698       }
699       DataLazy_ptr lleft;
700       if (!left->isLazy())
701       {
702        lleft=DataLazy_ptr(new DataLazy(left));
703       }
704       else
705       {
706        lleft=dynamic_pointer_cast<DataLazy>(left);
707       }
708       m_readytype=lleft->m_readytype;
709       m_left=lleft;
710       m_samplesize=getNumDPPSample()*getNoValues();
711       m_children=m_left->m_children+1;
712       m_height=m_left->m_height+1;
713       LazyNodeSetup();
714       SIZELIMIT
715    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
716  }  }
717    
718    
719  int  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
720  DataLazy::getBuffsRequired() const      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
721        m_op(op),
722        m_axis_offset(axis0),
723        m_transpose(axis1),
724        m_tol(0)
725  {  {
726      return m_buffsRequired;     if ((getOpgroup(op)!=G_NP1OUT_2P))
727       {
728        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
729       }
730       DataLazy_ptr lleft;
731       if (!left->isLazy())
732       {
733        lleft=DataLazy_ptr(new DataLazy(left));
734       }
735       else
736       {
737        lleft=dynamic_pointer_cast<DataLazy>(left);
738       }
739       m_readytype=lleft->m_readytype;
740       m_left=lleft;
741       m_samplesize=getNumDPPSample()*getNoValues();
742       m_children=m_left->m_children+1;
743       m_height=m_left->m_height+1;
744       LazyNodeSetup();
745       SIZELIMIT
746    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
747  }  }
748    
749    
750  size_t  namespace
 DataLazy::getMaxSampleSize() const  
751  {  {
752      return m_maxsamplesize;  
753        inline int max3(int a, int b, int c)
754        {
755        int t=(a>b?a:b);
756        return (t>c?t:c);
757    
758        }
759    }
760    
761    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
762        : parent(left->getFunctionSpace(), left->getShape()),
763        m_op(CONDEVAL),
764        m_axis_offset(0),
765        m_transpose(0),
766        m_tol(0)
767    {
768    
769       DataLazy_ptr lmask;
770       DataLazy_ptr lleft;
771       DataLazy_ptr lright;
772       if (!mask->isLazy())
773       {
774        lmask=DataLazy_ptr(new DataLazy(mask));
775       }
776       else
777       {
778        lmask=dynamic_pointer_cast<DataLazy>(mask);
779       }
780       if (!left->isLazy())
781       {
782        lleft=DataLazy_ptr(new DataLazy(left));
783       }
784       else
785       {
786        lleft=dynamic_pointer_cast<DataLazy>(left);
787       }
788       if (!right->isLazy())
789       {
790        lright=DataLazy_ptr(new DataLazy(right));
791       }
792       else
793       {
794        lright=dynamic_pointer_cast<DataLazy>(right);
795       }
796       m_readytype=lmask->m_readytype;
797       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
798       {
799        throw DataException("Programmer Error - condEval arguments must have the same readytype");
800       }
801       m_left=lleft;
802       m_right=lright;
803       m_mask=lmask;
804       m_samplesize=getNumDPPSample()*getNoValues();
805       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
806       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
807       LazyNodeSetup();
808       SIZELIMIT
809    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
810    }
811    
812    
813    
814    DataLazy::~DataLazy()
815    {
816       delete[] m_sampleids;
817  }  }
818    
819    
820  /*  /*
821    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
822    This does the work for the collapse method.    This does the work for the collapse method.
823    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
824  */  */
825  DataReady_ptr  DataReady_ptr
826  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
827  {  {
828    if (m_readytype=='E')    if (m_readytype=='E')
829    { // this is more an efficiency concern than anything else    { // this is more an efficiency concern than anything else
# Line 602  DataLazy::collapseToReady() Line 935  DataLazy::collapseToReady()
935      case LEZ:      case LEZ:
936      result=left.whereNonPositive();      result=left.whereNonPositive();
937      break;      break;
938        case NEZ:
939        result=left.whereNonZero(m_tol);
940        break;
941        case EZ:
942        result=left.whereZero(m_tol);
943        break;
944      case SYM:      case SYM:
945      result=left.symmetric();      result=left.symmetric();
946      break;      break;
# Line 611  DataLazy::collapseToReady() Line 950  DataLazy::collapseToReady()
950      case PROD:      case PROD:
951      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
952      break;      break;
953        case TRANS:
954        result=left.transpose(m_axis_offset);
955        break;
956        case TRACE:
957        result=left.trace(m_axis_offset);
958        break;
959        case SWAP:
960        result=left.swapaxes(m_axis_offset, m_transpose);
961        break;
962        case MINVAL:
963        result=left.minval();
964        break;
965        case MAXVAL:
966        result=left.minval();
967        break;
968      default:      default:
969      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
970    }    }
# Line 624  DataLazy::collapseToReady() Line 978  DataLazy::collapseToReady()
978     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
979  */  */
980  void  void
981  DataLazy::collapse()  DataLazy::collapse() const
982  {  {
983    if (m_op==IDENTITY)    if (m_op==IDENTITY)
984    {    {
# Line 638  DataLazy::collapse() Line 992  DataLazy::collapse()
992    m_op=IDENTITY;    m_op=IDENTITY;
993  }  }
994    
995  /*  
996    \brief Compute the value of the expression (unary operation) for the given sample.  
997    \return Vector which stores the value of the subexpression for the given sample.  
998    \param v A vector to store intermediate results.  
999    \param offset Index in v to begin storing results.  
1000    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1001    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1002        {\
1003    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1004    If the result is stored in v it should be stored at the offset given.        { \
1005    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1006  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1007  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1008  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1009             lroffset+=leftstep; \
1010             rroffset+=rightstep; \
1011          }\
1012          lroffset+=oleftstep;\
1013          rroffset+=orightstep;\
1014        }
1015    
1016    
1017    // The result will be stored in m_samples
1018    // The return value is a pointer to the DataVector, offset is the offset within the return value
1019    const DataTypes::ValueType*
1020    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1021    {
1022    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1023        // collapse so we have a 'E' node or an IDENTITY for some other type
1024      if (m_readytype!='E' && m_op!=IDENTITY)
1025      {
1026        collapse();
1027      }
1028      if (m_op==IDENTITY)  
1029      {
1030        const ValueType& vec=m_id->getVectorRO();
1031        roffset=m_id->getPointOffset(sampleNo, 0);
1032    #ifdef LAZY_STACK_PROF
1033    int x;
1034    if (&x<stackend[omp_get_thread_num()])
1035    {
1036           stackend[omp_get_thread_num()]=&x;
1037    }
1038    #endif
1039        return &(vec);
1040      }
1041      if (m_readytype!='E')
1042      {
1043        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1044      }
1045      if (m_sampleids[tid]==sampleNo)
1046      {
1047        roffset=tid*m_samplesize;
1048        return &(m_samples);        // sample is already resolved
1049      }
1050      m_sampleids[tid]=sampleNo;
1051    
1052      switch (getOpgroup(m_op))
1053      {
1054      case G_UNARY:
1055      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1056      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1057      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1058      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1059      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1060      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1061      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1062      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1063      default:
1064        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1065      }
1066    }
1067    
1068    const DataTypes::ValueType*
1069    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1070  {  {
1071      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1072      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1073      // processing single points.      // processing single points.
1074        // we will also know we won't get identity nodes
1075    if (m_readytype!='E')    if (m_readytype!='E')
1076    {    {
1077      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1078    }    }
1079    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1080    const double* left=&((*vleft)[roffset]);    {
1081    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1082    roffset=offset;    }
1083      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1084      const double* left=&((*leftres)[roffset]);
1085      roffset=m_samplesize*tid;
1086      double* result=&(m_samples[roffset]);
1087    switch (m_op)    switch (m_op)
1088    {    {
1089      case SIN:        case SIN:  
# Line 762  DataLazy::resolveUnary(ValueType& v, siz Line 1182  DataLazy::resolveUnary(ValueType& v, siz
1182      case LEZ:      case LEZ:
1183      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1184      break;      break;
1185    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1186        case NEZ:
1187        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1188        break;
1189        case EZ:
1190        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1191        break;
1192    
1193      default:      default:
1194      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1195    }    }
1196    return &v;    return &(m_samples);
1197  }  }
1198    
1199    
1200  /*  const DataTypes::ValueType*
1201    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1202  {  {
1203      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1204      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1205      // processing single points.      // processing single points.
1206        // we will also know we won't get identity nodes
1207    if (m_readytype!='E')    if (m_readytype!='E')
1208    {    {
1209      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1210    }    }
1211      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1212    size_t subroffset=roffset+m_samplesize;    {
1213    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1214    roffset=offset;    }
1215      size_t loffset=0;
1216      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1217    
1218      roffset=m_samplesize*tid;
1219      unsigned int ndpps=getNumDPPSample();
1220      unsigned int psize=DataTypes::noValues(m_left->getShape());
1221      double* result=&(m_samples[roffset]);
1222      switch (m_op)
1223      {
1224        case MINVAL:
1225        {
1226          for (unsigned int z=0;z<ndpps;++z)
1227          {
1228            FMin op;
1229            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1230            loffset+=psize;
1231            result++;
1232          }
1233        }
1234        break;
1235        case MAXVAL:
1236        {
1237          for (unsigned int z=0;z<ndpps;++z)
1238          {
1239          FMax op;
1240          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1241          loffset+=psize;
1242          result++;
1243          }
1244        }
1245        break;
1246        default:
1247        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1248      }
1249      return &(m_samples);
1250    }
1251    
1252    const DataTypes::ValueType*
1253    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1254    {
1255        // we assume that any collapsing has been done before we get here
1256        // since we only have one argument we don't need to think about only
1257        // processing single points.
1258      if (m_readytype!='E')
1259      {
1260        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1261      }
1262      if (m_op==IDENTITY)
1263      {
1264        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1265      }
1266      size_t subroffset;
1267      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1268      roffset=m_samplesize*tid;
1269      size_t loop=0;
1270      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1271      size_t step=getNoValues();
1272      size_t offset=roffset;
1273    switch (m_op)    switch (m_op)
1274    {    {
1275      case SYM:      case SYM:
1276      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1277        {
1278            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1279            subroffset+=step;
1280            offset+=step;
1281        }
1282      break;      break;
1283      case NSYM:      case NSYM:
1284      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1285        {
1286            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1287            subroffset+=step;
1288            offset+=step;
1289        }
1290      break;      break;
1291      default:      default:
1292      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1293    }    }
1294    return &v;    return &m_samples;
1295  }  }
1296    
1297    const DataTypes::ValueType*
1298    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1299    {
1300        // we assume that any collapsing has been done before we get here
1301        // since we only have one argument we don't need to think about only
1302        // processing single points.
1303      if (m_readytype!='E')
1304      {
1305        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1306      }
1307      if (m_op==IDENTITY)
1308      {
1309        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1310      }
1311      size_t subroffset;
1312      size_t offset;
1313      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1314      roffset=m_samplesize*tid;
1315      offset=roffset;
1316      size_t loop=0;
1317      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1318      size_t outstep=getNoValues();
1319      size_t instep=m_left->getNoValues();
1320      switch (m_op)
1321      {
1322        case TRACE:
1323        for (loop=0;loop<numsteps;++loop)
1324        {
1325                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1326            subroffset+=instep;
1327            offset+=outstep;
1328        }
1329        break;
1330        case TRANS:
1331        for (loop=0;loop<numsteps;++loop)
1332        {
1333                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1334            subroffset+=instep;
1335            offset+=outstep;
1336        }
1337        break;
1338        default:
1339        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1340      }
1341      return &m_samples;
1342    }
1343    
1344    
1345    const DataTypes::ValueType*
1346  #define PROC_OP(TYPE,X)                               \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1347      for (int i=0;i<steps;++i,resultp+=resultStep) \  {
1348      { \    if (m_readytype!='E')
1349         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    {
1350         lroffset+=leftStep; \      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1351         rroffset+=rightStep; \    }
1352      if (m_op==IDENTITY)
1353      {
1354        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1355      }
1356      size_t subroffset;
1357      size_t offset;
1358      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1359      roffset=m_samplesize*tid;
1360      offset=roffset;
1361      size_t loop=0;
1362      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1363      size_t outstep=getNoValues();
1364      size_t instep=m_left->getNoValues();
1365      switch (m_op)
1366      {
1367        case SWAP:
1368        for (loop=0;loop<numsteps;++loop)
1369        {
1370                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1371            subroffset+=instep;
1372            offset+=outstep;
1373      }      }
1374        break;
1375        default:
1376        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1377      }
1378      return &m_samples;
1379    }
1380    
1381    const DataTypes::ValueType*
1382    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1383    {
1384      if (m_readytype!='E')
1385      {
1386        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1387      }
1388      if (m_op!=CONDEVAL)
1389      {
1390        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1391      }
1392      size_t subroffset;
1393    
1394      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1395      const ValueType* srcres=0;
1396      if ((*maskres)[subroffset]>0)
1397      {
1398        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1399      }
1400      else
1401      {
1402        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1403      }
1404    
1405      // Now we need to copy the result
1406    
1407      roffset=m_samplesize*tid;
1408      for (int i=0;i<m_samplesize;++i)
1409      {
1410        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1411      }
1412    
1413      return &m_samples;
1414    }
1415    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1416  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1417  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1418  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 842  DataLazy::resolveNP1OUT(ValueType& v, si Line 1422  DataLazy::resolveNP1OUT(ValueType& v, si
1422  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1423  // For example, 2+Vector.  // For example, 2+Vector.
1424  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1425  DataTypes::ValueType*  const DataTypes::ValueType*
1426  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1427  {  {
1428  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1429    
1430    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1431      // first work out which of the children are expanded      // first work out which of the children are expanded
1432    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1433    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1434    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1435    int steps=(bigloops?1:getNumDPPSample());    {
1436    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1437    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1438    {    bool leftScalar=(m_left->getRank()==0);
1439      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1440      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1441      chunksize=1;    // for scalar    {
1442    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1443    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1444    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1445    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1446      size_t chunksize=1;           // how many doubles will be processed in one go
1447      int leftstep=0;       // how far should the left offset advance after each step
1448      int rightstep=0;
1449      int numsteps=0;       // total number of steps for the inner loop
1450      int oleftstep=0;  // the o variables refer to the outer loop
1451      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1452      int onumsteps=1;
1453      
1454      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1455      bool RES=(rightExp && rightScalar);
1456      bool LS=(!leftExp && leftScalar); // left is a single scalar
1457      bool RS=(!rightExp && rightScalar);
1458      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1459      bool RN=(!rightExp && !rightScalar);
1460      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1461      bool REN=(rightExp && !rightScalar);
1462    
1463      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1464      {
1465        chunksize=m_left->getNumDPPSample()*leftsize;
1466        leftstep=0;
1467        rightstep=0;
1468        numsteps=1;
1469      }
1470      else if (LES || RES)
1471      {
1472        chunksize=1;
1473        if (LES)        // left is an expanded scalar
1474        {
1475            if (RS)
1476            {
1477               leftstep=1;
1478               rightstep=0;
1479               numsteps=m_left->getNumDPPSample();
1480            }
1481            else        // RN or REN
1482            {
1483               leftstep=0;
1484               oleftstep=1;
1485               rightstep=1;
1486               orightstep=(RN ? -(int)rightsize : 0);
1487               numsteps=rightsize;
1488               onumsteps=m_left->getNumDPPSample();
1489            }
1490        }
1491        else        // right is an expanded scalar
1492        {
1493            if (LS)
1494            {
1495               rightstep=1;
1496               leftstep=0;
1497               numsteps=m_right->getNumDPPSample();
1498            }
1499            else
1500            {
1501               rightstep=0;
1502               orightstep=1;
1503               leftstep=1;
1504               oleftstep=(LN ? -(int)leftsize : 0);
1505               numsteps=leftsize;
1506               onumsteps=m_right->getNumDPPSample();
1507            }
1508        }
1509      }
1510      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1511      {
1512        if (LEN)    // and Right will be a single value
1513        {
1514            chunksize=rightsize;
1515            leftstep=rightsize;
1516            rightstep=0;
1517            numsteps=m_left->getNumDPPSample();
1518            if (RS)
1519            {
1520               numsteps*=leftsize;
1521            }
1522        }
1523        else    // REN
1524        {
1525            chunksize=leftsize;
1526            rightstep=leftsize;
1527            leftstep=0;
1528            numsteps=m_right->getNumDPPSample();
1529            if (LS)
1530            {
1531               numsteps*=rightsize;
1532            }
1533        }
1534      }
1535    
1536      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1537      // Get the values of sub-expressions      // Get the values of sub-expressions
1538    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1539    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1540      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1541    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1542    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1543    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1544    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1545    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1546    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1547    
1548    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1549    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1550    
1551    
1552      roffset=m_samplesize*tid;
1553      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1554    switch(m_op)    switch(m_op)
1555    {    {
1556      case ADD:      case ADD:
# Line 888  cout << "Resolve binary: " << toString() Line 1571  cout << "Resolve binary: " << toString()
1571      default:      default:
1572      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1573    }    }
1574    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1575    return &v;    return &m_samples;
1576  }  }
1577    
1578    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1579  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1580  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1581  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1582  DataTypes::ValueType*  const DataTypes::ValueType*
1583  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1584  {  {
1585  cout << "Resolve TensorProduct: " << toString() << endl;  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1586    
1587    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1588      // first work out which of the children are expanded      // first work out which of the children are expanded
1589    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1590    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1591    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1592    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1593    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1594    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1595      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1596    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1597    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1598    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1599      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1600    
1601      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1602    
1603    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1604    cout << getNoValues() << endl;)
1605    
1606    
1607    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1608    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1609    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1610    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1611    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1612    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1613    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1614    
1615      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1616    switch(m_op)    switch(m_op)
1617    {    {
1618      case PROD:      case PROD:
# Line 932  cout << "Resolve TensorProduct: " << toS Line 1620  cout << "Resolve TensorProduct: " << toS
1620      {      {
1621            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1622            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1623    
1624    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1625    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1626    
1627            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1628    
1629        lroffset+=leftStep;        lroffset+=leftStep;
1630        rroffset+=rightStep;        rroffset+=rightStep;
1631      }      }
# Line 941  cout << "Resolve TensorProduct: " << toS Line 1634  cout << "Resolve TensorProduct: " << toS
1634      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1635    }    }
1636    roffset=offset;    roffset=offset;
1637    return &v;    return &m_samples;
1638  }  }
1639    
1640    
1641    const DataTypes::ValueType*
1642    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1643    {
1644    #ifdef _OPENMP
1645        int tid=omp_get_thread_num();
1646    #else
1647        int tid=0;
1648    #endif
1649    
1650  /*  #ifdef LAZY_STACK_PROF
1651    \brief Compute the value of the expression for the given sample.      stackstart[tid]=&tid;
1652    \return Vector which stores the value of the subexpression for the given sample.      stackend[tid]=&tid;
1653    \param v A vector to store intermediate results.      const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1654    \param offset Index in v to begin storing results.      size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1655    \param sampleNo Sample number to evaluate.      #pragma omp critical
1656    \param roffset (output parameter) the offset in the return vector where the result begins.      if (d>maxstackuse)
1657        {
1658    cout << "Max resolve Stack use " << d << endl;
1659            maxstackuse=d;
1660        }
1661        return r;
1662    #else
1663        return resolveNodeSample(tid, sampleNo, roffset);
1664    #endif
1665    }
1666    
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1667    
1668  // the roffset is the offset within the returned vector where the data begins  // This needs to do the work of the identity constructor
1669  const DataTypes::ValueType*  void
1670  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveToIdentity()
1671  {  {
1672  cout << "Resolve sample " << toString() << endl;     if (m_op==IDENTITY)
1673      // collapse so we have a 'E' node or an IDENTITY for some other type      return;
1674    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1675       makeIdentity(p);
1676    }
1677    
1678    void DataLazy::makeIdentity(const DataReady_ptr& p)
1679    {
1680       m_op=IDENTITY;
1681       m_axis_offset=0;
1682       m_transpose=0;
1683       m_SL=m_SM=m_SR=0;
1684       m_children=m_height=0;
1685       m_id=p;
1686       if(p->isConstant()) {m_readytype='C';}
1687       else if(p->isExpanded()) {m_readytype='E';}
1688       else if (p->isTagged()) {m_readytype='T';}
1689       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1690       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1691       m_left.reset();
1692       m_right.reset();
1693    }
1694    
1695    
1696    DataReady_ptr
1697    DataLazy::resolve()
1698    {
1699        resolveToIdentity();
1700        return m_id;
1701    }
1702    
1703    
1704    /* This is really a static method but I think that caused problems in windows */
1705    void
1706    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1707    {
1708      if (dats.empty())
1709    {    {
1710      collapse();      return;
1711    }    }
1712    if (m_op==IDENTITY)      vector<DataLazy*> work;
1713      FunctionSpace fs=dats[0]->getFunctionSpace();
1714      bool match=true;
1715      for (int i=dats.size()-1;i>=0;--i)
1716    {    {
1717      const ValueType& vec=m_id->getVector();      if (dats[i]->m_readytype!='E')
1718      if (m_readytype=='C')      {
1719      {          dats[i]->collapse();
1720      roffset=0;      }
1721      return &(vec);      if (dats[i]->m_op!=IDENTITY)
1722      }      {
1723      roffset=m_id->getPointOffset(sampleNo, 0);          work.push_back(dats[i]);
1724      return &(vec);          if (fs!=dats[i]->getFunctionSpace())
1725            {
1726                match=false;
1727            }
1728        }
1729    }    }
1730    if (m_readytype!='E')    if (work.empty())
1731    {    {
1732      throw DataException("Programmer Error - Collapse did not produce an expanded node.");      return;     // no work to do
1733    }    }
1734    switch (getOpgroup(m_op))    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1735      {     // it is possible that dats[0] is one of the objects which we discarded and
1736            // all the other functionspaces match.
1737        vector<DataExpanded*> dep;
1738        vector<ValueType*> vecs;
1739        for (int i=0;i<work.size();++i)
1740        {
1741            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1742            vecs.push_back(&(dep[i]->getVectorRW()));
1743        }
1744        int totalsamples=work[0]->getNumSamples();
1745        const ValueType* res=0; // Storage for answer
1746        int sample;
1747        #pragma omp parallel private(sample, res)
1748        {
1749            size_t roffset=0;
1750            #pragma omp for schedule(static)
1751            for (sample=0;sample<totalsamples;++sample)
1752            {
1753            roffset=0;
1754            int j;
1755            for (j=work.size()-1;j>=0;--j)
1756            {
1757    #ifdef _OPENMP
1758                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1759    #else
1760                    res=work[j]->resolveNodeSample(0,sample,roffset);
1761    #endif
1762                    DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1763                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1764            }
1765            }
1766        }
1767        // Now we need to load the new results as identity ops into the lazy nodes
1768        for (int i=work.size()-1;i>=0;--i)
1769        {
1770            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1771        }
1772      }
1773      else  // functionspaces do not match
1774    {    {
1775    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      for (int i=0;i<work.size();++i)
1776    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      {
1777    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);          work[i]->resolveToIdentity();
1778    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);      }
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1779    }    }
1780  }  }
1781    
1782    
1783  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1784  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1785  DataReady_ptr  DataReady_ptr
1786  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1787  {  {
   
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
1788    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1789    {    {
1790      collapse();      collapse();
# Line 1016  cout << "Buffers=" << m_buffsRequired << Line 1794  cout << "Buffers=" << m_buffsRequired <<
1794      return m_id;      return m_id;
1795    }    }
1796      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
   int threadnum=0;  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1797    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1798    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1799    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1800    
1801    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1802    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1803    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1804    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1805    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel private(sample,res)
1806    for (sample=0;sample<totalsamples;++sample)    {
1807    {      size_t roffset=0;
1808  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1809        stackstart[omp_get_thread_num()]=&roffset;
1810        stackend[omp_get_thread_num()]=&roffset;
1811    #endif
1812        #pragma omp for schedule(static)
1813        for (sample=0;sample<totalsamples;++sample)
1814        {
1815            roffset=0;
1816  #ifdef _OPENMP  #ifdef _OPENMP
1817      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1818  #else  #else
1819      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1820  #endif  #endif
1821  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1822      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1823  cerr << "offset=" << outoffset << endl;              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1824      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1825      {      }
1826      resvec[outoffset]=(*res)[resoffset];    }
1827      }  #ifdef LAZY_STACK_PROF
1828  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1829      {
1830        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1831    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1832        if (r>maxstackuse)
1833        {
1834            maxstackuse=r;
1835        }
1836    }    }
1837      cout << "Max resolve Stack use=" << maxstackuse << endl;
1838    #endif
1839    return resptr;    return resptr;
1840  }  }
1841    
# Line 1058  std::string Line 1843  std::string
1843  DataLazy::toString() const  DataLazy::toString() const
1844  {  {
1845    ostringstream oss;    ostringstream oss;
1846    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1847    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1848      {
1849      case 1:   // tree format
1850        oss << endl;
1851        intoTreeString(oss,"");
1852        break;
1853      case 2:   // just the depth
1854        break;
1855      default:
1856        intoString(oss);
1857        break;
1858      }
1859    return oss.str();    return oss.str();
1860  }  }
1861    
# Line 1067  DataLazy::toString() const Line 1863  DataLazy::toString() const
1863  void  void
1864  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1865  {  {
1866    //    oss << "[" << m_children <<";"<<m_height <<"]";
1867    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1868    {    {
1869    case G_IDENTITY:    case G_IDENTITY:
# Line 1096  DataLazy::intoString(ostringstream& oss) Line 1893  DataLazy::intoString(ostringstream& oss)
1893      oss << ')';      oss << ')';
1894      break;      break;
1895    case G_UNARY:    case G_UNARY:
1896      case G_UNARY_P:
1897    case G_NP1OUT:    case G_NP1OUT:
1898      case G_NP1OUT_P:
1899      case G_REDUCTION:
1900      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1901      m_left->intoString(oss);      m_left->intoString(oss);
1902      oss << ')';      oss << ')';
# Line 1108  DataLazy::intoString(ostringstream& oss) Line 1908  DataLazy::intoString(ostringstream& oss)
1908      m_right->intoString(oss);      m_right->intoString(oss);
1909      oss << ')';      oss << ')';
1910      break;      break;
1911      case G_NP1OUT_2P:
1912        oss << opToString(m_op) << '(';
1913        m_left->intoString(oss);
1914        oss << ", " << m_axis_offset << ", " << m_transpose;
1915        oss << ')';
1916        break;
1917      case G_CONDEVAL:
1918        oss << opToString(m_op)<< '(' ;
1919        m_mask->intoString(oss);
1920        oss << " ? ";
1921        m_left->intoString(oss);
1922        oss << " : ";
1923        m_right->intoString(oss);
1924        oss << ')';
1925        break;
1926      default:
1927        oss << "UNKNOWN";
1928      }
1929    }
1930    
1931    
1932    void
1933    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1934    {
1935      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1936      switch (getOpgroup(m_op))
1937      {
1938      case G_IDENTITY:
1939        if (m_id->isExpanded())
1940        {
1941           oss << "E";
1942        }
1943        else if (m_id->isTagged())
1944        {
1945          oss << "T";
1946        }
1947        else if (m_id->isConstant())
1948        {
1949          oss << "C";
1950        }
1951        else
1952        {
1953          oss << "?";
1954        }
1955        oss << '@' << m_id.get() << endl;
1956        break;
1957      case G_BINARY:
1958        oss << opToString(m_op) << endl;
1959        indent+='.';
1960        m_left->intoTreeString(oss, indent);
1961        m_right->intoTreeString(oss, indent);
1962        break;
1963      case G_UNARY:
1964      case G_UNARY_P:
1965      case G_NP1OUT:
1966      case G_NP1OUT_P:
1967      case G_REDUCTION:
1968        oss << opToString(m_op) << endl;
1969        indent+='.';
1970        m_left->intoTreeString(oss, indent);
1971        break;
1972      case G_TENSORPROD:
1973        oss << opToString(m_op) << endl;
1974        indent+='.';
1975        m_left->intoTreeString(oss, indent);
1976        m_right->intoTreeString(oss, indent);
1977        break;
1978      case G_NP1OUT_2P:
1979        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1980        indent+='.';
1981        m_left->intoTreeString(oss, indent);
1982        break;
1983    default:    default:
1984      oss << "UNKNOWN";      oss << "UNKNOWN";
1985    }    }
1986  }  }
1987    
1988    
1989  DataAbstract*  DataAbstract*
1990  DataLazy::deepCopy()  DataLazy::deepCopy()
1991  {  {
1992    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1993    {    {
1994    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1995    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1996      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1997      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1998    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1999    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2000    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2002      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003    default:    default:
2004      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2005    }    }
2006  }  }
2007    
2008    
2009    
2010  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2011  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2012  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
# Line 1204  DataLazy::getPointOffset(int sampleNo, Line 2082  DataLazy::getPointOffset(int sampleNo,
2082    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2083  }  }
2084    
2085  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2086  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2087  void  void
2088  DataLazy::setToZero()  DataLazy::setToZero()
2089  {  {
2090    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
2091    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2092    m_op=IDENTITY;  //   m_op=IDENTITY;
2093    m_right.reset();    //   m_right.reset();  
2094    m_left.reset();  //   m_left.reset();
2095    m_readytype='C';  //   m_readytype='C';
2096    m_buffsRequired=1;  //   m_buffsRequired=1;
2097    
2098      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2099      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2100    }
2101    
2102    bool
2103    DataLazy::actsExpanded() const
2104    {
2105        return (m_readytype=='E');
2106  }  }
2107    
2108  }   // end namespace  }   // end namespace

Legend:
Removed from v.2066  
changed lines
  Added in v.5448

  ViewVC Help
Powered by ViewVC 1.1.26