/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2147 by jfenwick, Wed Dec 10 04:41:26 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 4657 by jfenwick, Thu Feb 6 06:12:20 2014 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2014 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    
18  #include "DataLazy.h"  #include "DataLazy.h"
19  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
20  #ifdef _OPENMP  #ifdef _OPENMP
21  #include <omp.h>  #include <omp.h>
22  #endif  #endif
# Line 28  Line 26 
26  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
27  #include "Utils.h"  #include "Utils.h"
28    
29  // #define LAZYDEBUG(X) X;  #include "EscriptParams.h"
30    
31    #ifdef USE_NETCDF
32    #include <netcdfcpp.h>
33    #endif
34    
35    #include <iomanip>      // for some fancy formatting in debug
36    
37    // #define LAZYDEBUG(X) if (privdebug){X;}
38  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
39    namespace
40    {
41    bool privdebug=false;
42    
43    #define ENABLEDEBUG privdebug=true;
44    #define DISABLEDEBUG privdebug=false;
45    }
46    
47    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
48    
49    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
50    
51    
52    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
53    
54  /*  /*
55  How does DataLazy work?  How does DataLazy work?
# Line 42  A special operation, IDENTITY, stores an Line 62  A special operation, IDENTITY, stores an
62  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
63    
64  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
66    
67  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 51  I will refer to individual DataLazy obje Line 71  I will refer to individual DataLazy obje
71  Each node also stores:  Each node also stores:
72  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73      evaluated.      evaluated.
74  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75      evaluate the expression.      evaluate the expression.
76  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
77    
# Line 93  namespace escript Line 113  namespace escript
113  namespace  namespace
114  {  {
115    
116    
117    // enabling this will print out when ever the maximum stacksize used by resolve increases
118    // it assumes _OPENMP is also in use
119    //#define LAZY_STACK_PROF
120    
121    
122    
123    #ifndef _OPENMP
124      #ifdef LAZY_STACK_PROF
125      #undef LAZY_STACK_PROF
126      #endif
127    #endif
128    
129    
130    #ifdef LAZY_STACK_PROF
131    std::vector<void*> stackstart(getNumberOfThreads());
132    std::vector<void*> stackend(getNumberOfThreads());
133    size_t maxstackuse=0;
134    #endif
135    
136  enum ES_opgroup  enum ES_opgroup
137  {  {
138     G_UNKNOWN,     G_UNKNOWN,
# Line 102  enum ES_opgroup Line 142  enum ES_opgroup
142     G_UNARY_P,       // pointwise operations with one argument, requiring a parameter     G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
143     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
144     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
145     G_TENSORPROD     // general tensor product     G_TENSORPROD,    // general tensor product
146       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
147       G_REDUCTION,     // non-pointwise unary op with a scalar output
148       G_CONDEVAL
149  };  };
150    
151    
# Line 116  string ES_opstrings[]={"UNKNOWN","IDENTI Line 159  string ES_opstrings[]={"UNKNOWN","IDENTI
159              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
160              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
161              "prod",              "prod",
162              "transpose", "trace"};              "transpose", "trace",
163  int ES_opcount=40;              "swapaxes",
164                "minval", "maxval",
165                "condEval"};
166    int ES_opcount=44;
167  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
168              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
169              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 126  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 172  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
172              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
173              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
174              G_TENSORPROD,              G_TENSORPROD,
175              G_NP1OUT_P, G_NP1OUT_P};              G_NP1OUT_P, G_NP1OUT_P,
176                G_NP1OUT_2P,
177                G_REDUCTION, G_REDUCTION,
178                G_CONDEVAL};
179  inline  inline
180  ES_opgroup  ES_opgroup
181  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 147  resultFS(DataAbstract_ptr left, DataAbst Line 196  resultFS(DataAbstract_ptr left, DataAbst
196    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
197    if (l!=r)    if (l!=r)
198    {    {
199      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
200        if (res==1)
201      {      {
202      return l;      return l;
203      }      }
204      if (l.probeInterpolation(r))      if (res==-1)
205      {      {
206      return r;      return r;
207      }      }
# Line 171  resultShape(DataAbstract_ptr left, DataA Line 221  resultShape(DataAbstract_ptr left, DataA
221        {        {
222          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
223        }        }
224    
225        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
226        {        {
227          return right->getShape();          return right->getShape();
# Line 187  resultShape(DataAbstract_ptr left, DataA Line 238  resultShape(DataAbstract_ptr left, DataA
238  // return the shape for "op left"  // return the shape for "op left"
239    
240  DataTypes::ShapeType  DataTypes::ShapeType
241  resultShape(DataAbstract_ptr left, ES_optype op)  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
242  {  {
243      switch(op)      switch(op)
244      {      {
245          case TRANS:          case TRANS:
246          return left->getShape();         {            // for the scoping of variables
247            const DataTypes::ShapeType& s=left->getShape();
248            DataTypes::ShapeType sh;
249            int rank=left->getRank();
250            if (axis_offset<0 || axis_offset>rank)
251            {
252                stringstream e;
253                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
254                throw DataException(e.str());
255            }
256            for (int i=0; i<rank; i++)
257            {
258               int index = (axis_offset+i)%rank;
259               sh.push_back(s[index]); // Append to new shape
260            }
261            return sh;
262           }
263      break;      break;
264      case TRACE:      case TRACE:
265          return DataTypes::scalarShape;         {
266            int rank=left->getRank();
267            if (rank<2)
268            {
269               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
270            }
271            if ((axis_offset>rank-2) || (axis_offset<0))
272            {
273               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
274            }
275            if (rank==2)
276            {
277               return DataTypes::scalarShape;
278            }
279            else if (rank==3)
280            {
281               DataTypes::ShapeType sh;
282                   if (axis_offset==0)
283               {
284                    sh.push_back(left->getShape()[2]);
285                   }
286                   else     // offset==1
287               {
288                sh.push_back(left->getShape()[0]);
289                   }
290               return sh;
291            }
292            else if (rank==4)
293            {
294               DataTypes::ShapeType sh;
295               const DataTypes::ShapeType& s=left->getShape();
296                   if (axis_offset==0)
297               {
298                    sh.push_back(s[2]);
299                    sh.push_back(s[3]);
300                   }
301                   else if (axis_offset==1)
302               {
303                    sh.push_back(s[0]);
304                    sh.push_back(s[3]);
305                   }
306               else     // offset==2
307               {
308                sh.push_back(s[0]);
309                sh.push_back(s[1]);
310               }
311               return sh;
312            }
313            else        // unknown rank
314            {
315               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
316            }
317           }
318      break;      break;
319          default:          default:
320      throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");      throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
321      }      }
322  }  }
323    
324    DataTypes::ShapeType
325    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
326    {
327         // This code taken from the Data.cpp swapaxes() method
328         // Some of the checks are probably redundant here
329         int axis0_tmp,axis1_tmp;
330         const DataTypes::ShapeType& s=left->getShape();
331         DataTypes::ShapeType out_shape;
332         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
333         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
334         int rank=left->getRank();
335         if (rank<2) {
336            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
337         }
338         if (axis0<0 || axis0>rank-1) {
339            stringstream e;
340            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
341            throw DataException(e.str());
342         }
343         if (axis1<0 || axis1>rank-1) {
344            stringstream e;
345            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
346            throw DataException(e.str());
347         }
348         if (axis0 == axis1) {
349             throw DataException("Error - Data::swapaxes: axis indices must be different.");
350         }
351         if (axis0 > axis1) {
352             axis0_tmp=axis1;
353             axis1_tmp=axis0;
354         } else {
355             axis0_tmp=axis0;
356             axis1_tmp=axis1;
357         }
358         for (int i=0; i<rank; i++) {
359           if (i == axis0_tmp) {
360              out_shape.push_back(s[axis1_tmp]);
361           } else if (i == axis1_tmp) {
362              out_shape.push_back(s[axis0_tmp]);
363           } else {
364              out_shape.push_back(s[i]);
365           }
366         }
367        return out_shape;
368    }
369    
370    
371  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
372  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
373  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
# Line 266  GTPShape(DataAbstract_ptr left, DataAbst Line 432  GTPShape(DataAbstract_ptr left, DataAbst
432    return shape2;    return shape2;
433  }  }
434    
   
 // determine the number of points in the result of "left op right"  
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
435  }   // end anonymous namespace  }   // end anonymous namespace
436    
437    
# Line 318  opToString(ES_optype op) Line 447  opToString(ES_optype op)
447    return ES_opstrings[op];    return ES_opstrings[op];
448  }  }
449    
450    void DataLazy::LazyNodeSetup()
451    {
452    #ifdef _OPENMP
453        int numthreads=omp_get_max_threads();
454        m_samples.resize(numthreads*m_samplesize);
455        m_sampleids=new int[numthreads];
456        for (int i=0;i<numthreads;++i)
457        {
458            m_sampleids[i]=-1;  
459        }
460    #else
461        m_samples.resize(m_samplesize);
462        m_sampleids=new int[1];
463        m_sampleids[0]=-1;
464    #endif  // _OPENMP
465    }
466    
467    
468    // Creates an identity node
469  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
470      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
471      m_op(IDENTITY),      ,m_sampleids(0),
472      m_axis_offset(0),      m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
473  {  {
474     if (p->isLazy())     if (p->isLazy())
475     {     {
# Line 335  DataLazy::DataLazy(DataAbstract_ptr p) Line 480  DataLazy::DataLazy(DataAbstract_ptr p)
480     }     }
481     else     else
482     {     {
483      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
484      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
485      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
486      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
487     }     }
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
488  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
489  }  }
490    
   
   
   
491  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
492      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
493      m_op(op),      m_op(op),
494      m_axis_offset(0),      m_axis_offset(0),
495      m_transpose(0),      m_transpose(0),
496      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
497  {  {
498     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
499     {     {
500      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
501     }     }
# Line 373  DataLazy::DataLazy(DataAbstract_ptr left Line 511  DataLazy::DataLazy(DataAbstract_ptr left
511     }     }
512     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
513     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
514     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
515     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
516       m_height=m_left->m_height+1;
517       LazyNodeSetup();
518       SIZELIMIT
519  }  }
520    
521    
# Line 385  DataLazy::DataLazy(DataAbstract_ptr left Line 525  DataLazy::DataLazy(DataAbstract_ptr left
525      m_op(op),      m_op(op),
526      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
527  {  {
528    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
529     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
530     {     {
531      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
# Line 401  DataLazy::DataLazy(DataAbstract_ptr left Line 542  DataLazy::DataLazy(DataAbstract_ptr left
542     {     {
543      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
544      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
545    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
546     }     }
547     left->operandCheck(*right);     left->operandCheck(*right);
548    
549     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
550     {     {
551      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
552    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
553     }     }
554     else     else
555     {     {
556      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
557    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
558     }     }
559     if (right->isLazy())     if (right->isLazy())
560     {     {
561      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
562    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
563     }     }
564     else     else
565     {     {
566      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
567    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
568     }     }
569     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
570     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 435  DataLazy::DataLazy(DataAbstract_ptr left Line 581  DataLazy::DataLazy(DataAbstract_ptr left
581      m_readytype='C';      m_readytype='C';
582     }     }
583     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
584     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
585     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
586       LazyNodeSetup();
587       SIZELIMIT
588  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
589  }  }
590    
# Line 466  DataLazy::DataLazy(DataAbstract_ptr left Line 614  DataLazy::DataLazy(DataAbstract_ptr left
614      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
615      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
616     }     }
617     left->operandCheck(*right);  //    left->operandCheck(*right);
618    
619     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
620     {     {
# Line 499  DataLazy::DataLazy(DataAbstract_ptr left Line 647  DataLazy::DataLazy(DataAbstract_ptr left
647      m_readytype='C';      m_readytype='C';
648     }     }
649     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
650     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
651     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
652       LazyNodeSetup();
653       SIZELIMIT
654  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
655  }  }
656    
657    
658  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
659      : parent(left->getFunctionSpace(), resultShape(left,op)),      : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
660      m_op(op),      m_op(op),
661      m_axis_offset(axis_offset),      m_axis_offset(axis_offset),
662      m_transpose(0),      m_transpose(0),
# Line 527  DataLazy::DataLazy(DataAbstract_ptr left Line 677  DataLazy::DataLazy(DataAbstract_ptr left
677     }     }
678     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
679     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
680     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
681     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
682       m_height=m_left->m_height+1;
683       LazyNodeSetup();
684       SIZELIMIT
685  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
686  }  }
687    
# Line 555  DataLazy::DataLazy(DataAbstract_ptr left Line 707  DataLazy::DataLazy(DataAbstract_ptr left
707     }     }
708     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
709     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
710     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
711     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
712       m_height=m_left->m_height+1;
713       LazyNodeSetup();
714       SIZELIMIT
715  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
716  }  }
717    
718  DataLazy::~DataLazy()  
719    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
720        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
721        m_op(op),
722        m_axis_offset(axis0),
723        m_transpose(axis1),
724        m_tol(0)
725  {  {
726       if ((getOpgroup(op)!=G_NP1OUT_2P))
727       {
728        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
729       }
730       DataLazy_ptr lleft;
731       if (!left->isLazy())
732       {
733        lleft=DataLazy_ptr(new DataLazy(left));
734       }
735       else
736       {
737        lleft=dynamic_pointer_cast<DataLazy>(left);
738       }
739       m_readytype=lleft->m_readytype;
740       m_left=lleft;
741       m_samplesize=getNumDPPSample()*getNoValues();
742       m_children=m_left->m_children+1;
743       m_height=m_left->m_height+1;
744       LazyNodeSetup();
745       SIZELIMIT
746    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
747  }  }
748    
749    
750  int  namespace
751  DataLazy::getBuffsRequired() const  {
752    
753        inline int max3(int a, int b, int c)
754        {
755        int t=(a>b?a:b);
756        return (t>c?t:c);
757    
758        }
759    }
760    
761    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
762        : parent(left->getFunctionSpace(), left->getShape()),
763        m_op(CONDEVAL),
764        m_axis_offset(0),
765        m_transpose(0),
766        m_tol(0)
767  {  {
768      return m_buffsRequired;  
769       DataLazy_ptr lmask;
770       DataLazy_ptr lleft;
771       DataLazy_ptr lright;
772       if (!mask->isLazy())
773       {
774        lmask=DataLazy_ptr(new DataLazy(mask));
775       }
776       else
777       {
778        lmask=dynamic_pointer_cast<DataLazy>(mask);
779       }
780       if (!left->isLazy())
781       {
782        lleft=DataLazy_ptr(new DataLazy(left));
783       }
784       else
785       {
786        lleft=dynamic_pointer_cast<DataLazy>(left);
787       }
788       if (!right->isLazy())
789       {
790        lright=DataLazy_ptr(new DataLazy(right));
791       }
792       else
793       {
794        lright=dynamic_pointer_cast<DataLazy>(right);
795       }
796       m_readytype=lmask->m_readytype;
797       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
798       {
799        throw DataException("Programmer Error - condEval arguments must have the same readytype");
800       }
801       m_left=lleft;
802       m_right=lright;
803       m_mask=lmask;
804       m_samplesize=getNumDPPSample()*getNoValues();
805       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
806       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
807       LazyNodeSetup();
808       SIZELIMIT
809    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
810  }  }
811    
812    
813  size_t  
814  DataLazy::getMaxSampleSize() const  DataLazy::~DataLazy()
815  {  {
816      return m_maxsamplesize;     delete[] m_sampleids;
817  }  }
818    
819    
820  /*  /*
821    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
822    This does the work for the collapse method.    This does the work for the collapse method.
823    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
824  */  */
825  DataReady_ptr  DataReady_ptr
826  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
827  {  {
828    if (m_readytype=='E')    if (m_readytype=='E')
829    { // this is more an efficiency concern than anything else    { // this is more an efficiency concern than anything else
# Line 718  DataLazy::collapseToReady() Line 956  DataLazy::collapseToReady()
956      case TRACE:      case TRACE:
957      result=left.trace(m_axis_offset);      result=left.trace(m_axis_offset);
958      break;      break;
959        case SWAP:
960        result=left.swapaxes(m_axis_offset, m_transpose);
961        break;
962        case MINVAL:
963        result=left.minval();
964        break;
965        case MAXVAL:
966        result=left.minval();
967        break;
968      default:      default:
969      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
970    }    }
# Line 731  DataLazy::collapseToReady() Line 978  DataLazy::collapseToReady()
978     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
979  */  */
980  void  void
981  DataLazy::collapse()  DataLazy::collapse() const
982  {  {
983    if (m_op==IDENTITY)    if (m_op==IDENTITY)
984    {    {
# Line 745  DataLazy::collapse() Line 992  DataLazy::collapse()
992    m_op=IDENTITY;    m_op=IDENTITY;
993  }  }
994    
995  /*  
996    \brief Compute the value of the expression (unary operation) for the given sample.  
997    \return Vector which stores the value of the subexpression for the given sample.  
998    \param v A vector to store intermediate results.  
999    \param offset Index in v to begin storing results.  
1000    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1001    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1002        {\
1003    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1004    If the result is stored in v it should be stored at the offset given.        { \
1005    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1006  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1007  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1008  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1009             lroffset+=leftstep; \
1010             rroffset+=rightstep; \
1011          }\
1012          lroffset+=oleftstep;\
1013          rroffset+=orightstep;\
1014        }
1015    
1016    
1017    // The result will be stored in m_samples
1018    // The return value is a pointer to the DataVector, offset is the offset within the return value
1019    const DataTypes::ValueType*
1020    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1021    {
1022    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1023        // collapse so we have a 'E' node or an IDENTITY for some other type
1024      if (m_readytype!='E' && m_op!=IDENTITY)
1025      {
1026        collapse();
1027      }
1028      if (m_op==IDENTITY)  
1029      {
1030        const ValueType& vec=m_id->getVectorRO();
1031        roffset=m_id->getPointOffset(sampleNo, 0);
1032    #ifdef LAZY_STACK_PROF
1033    int x;
1034    if (&x<stackend[omp_get_thread_num()])
1035    {
1036           stackend[omp_get_thread_num()]=&x;
1037    }
1038    #endif
1039        return &(vec);
1040      }
1041      if (m_readytype!='E')
1042      {
1043        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1044      }
1045      if (m_sampleids[tid]==sampleNo)
1046      {
1047        roffset=tid*m_samplesize;
1048        return &(m_samples);        // sample is already resolved
1049      }
1050      m_sampleids[tid]=sampleNo;
1051    
1052      switch (getOpgroup(m_op))
1053      {
1054      case G_UNARY:
1055      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1056      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1057      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1058      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1059      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1060      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1061      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1062      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1063      default:
1064        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1065      }
1066    }
1067    
1068    const DataTypes::ValueType*
1069    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1070  {  {
1071      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1072      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1073      // processing single points.      // processing single points.
1074        // we will also know we won't get identity nodes
1075    if (m_readytype!='E')    if (m_readytype!='E')
1076    {    {
1077      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1078    }    }
1079    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1080    const double* left=&((*vleft)[roffset]);    {
1081    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1082    roffset=offset;    }
1083      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1084      const double* left=&((*leftres)[roffset]);
1085      roffset=m_samplesize*tid;
1086      double* result=&(m_samples[roffset]);
1087    switch (m_op)    switch (m_op)
1088    {    {
1089      case SIN:        case SIN:  
# Line 880  DataLazy::resolveUnary(ValueType& v, siz Line 1193  DataLazy::resolveUnary(ValueType& v, siz
1193      default:      default:
1194      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1195    }    }
1196    return &v;    return &(m_samples);
1197  }  }
1198    
1199    
1200    const DataTypes::ValueType*
1201    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1202    {
1203        // we assume that any collapsing has been done before we get here
1204        // since we only have one argument we don't need to think about only
1205        // processing single points.
1206        // we will also know we won't get identity nodes
1207      if (m_readytype!='E')
1208      {
1209        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1210      }
1211      if (m_op==IDENTITY)
1212      {
1213        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1214      }
1215      size_t loffset=0;
1216      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1217    
1218      roffset=m_samplesize*tid;
1219      unsigned int ndpps=getNumDPPSample();
1220      unsigned int psize=DataTypes::noValues(m_left->getShape());
1221      double* result=&(m_samples[roffset]);
1222      switch (m_op)
1223      {
1224        case MINVAL:
1225        {
1226          for (unsigned int z=0;z<ndpps;++z)
1227          {
1228            FMin op;
1229            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1230            loffset+=psize;
1231            result++;
1232          }
1233        }
1234        break;
1235        case MAXVAL:
1236        {
1237          for (unsigned int z=0;z<ndpps;++z)
1238          {
1239          FMax op;
1240          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1241          loffset+=psize;
1242          result++;
1243          }
1244        }
1245        break;
1246        default:
1247        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1248      }
1249      return &(m_samples);
1250    }
1251    
1252    const DataTypes::ValueType*
1253    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1254  {  {
1255      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1256      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1257      // processing single points.      // processing single points.
1258    if (m_readytype!='E')    if (m_readytype!='E')
1259    {    {
1260      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1261    }    }
1262      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1263    size_t subroffset=roffset+m_samplesize;    {
1264    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1265    roffset=offset;    }
1266      size_t subroffset;
1267      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1268      roffset=m_samplesize*tid;
1269      size_t loop=0;
1270      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1271      size_t step=getNoValues();
1272      size_t offset=roffset;
1273    switch (m_op)    switch (m_op)
1274    {    {
1275      case SYM:      case SYM:
1276      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1277        {
1278            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1279            subroffset+=step;
1280            offset+=step;
1281        }
1282      break;      break;
1283      case NSYM:      case NSYM:
1284      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1285        {
1286            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1287            subroffset+=step;
1288            offset+=step;
1289        }
1290      break;      break;
1291      default:      default:
1292      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1293    }    }
1294    return &v;    return &m_samples;
1295  }  }
1296    
1297  /*  const DataTypes::ValueType*
1298    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1299  {  {
1300      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1301      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1302      // processing single points.      // processing single points.
1303    if (m_readytype!='E')    if (m_readytype!='E')
1304    {    {
1305      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1306    }    }
1307      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1308    size_t subroffset=roffset+m_samplesize;    {
1309    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1310    roffset=offset;    }
1311      size_t subroffset;
1312      size_t offset;
1313      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1314      roffset=m_samplesize*tid;
1315      offset=roffset;
1316      size_t loop=0;
1317      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1318      size_t outstep=getNoValues();
1319      size_t instep=m_left->getNoValues();
1320    switch (m_op)    switch (m_op)
1321    {    {
1322      case TRACE:      case TRACE:
1323           DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);      for (loop=0;loop<numsteps;++loop)
1324        {
1325                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1326            subroffset+=instep;
1327            offset+=outstep;
1328        }
1329      break;      break;
1330      case TRANS:      case TRANS:
1331           DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);      for (loop=0;loop<numsteps;++loop)
1332        {
1333                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1334            subroffset+=instep;
1335            offset+=outstep;
1336        }
1337      break;      break;
1338      default:      default:
1339      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1340    }    }
1341    return &v;    return &m_samples;
1342  }  }
1343    
1344    
1345  #define PROC_OP(TYPE,X)                               \  const DataTypes::ValueType*
1346      for (int i=0;i<steps;++i,resultp+=resultStep) \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1347      { \  {
1348         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    if (m_readytype!='E')
1349         lroffset+=leftStep; \    {
1350         rroffset+=rightStep; \      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1351      }
1352      if (m_op==IDENTITY)
1353      {
1354        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1355      }
1356      size_t subroffset;
1357      size_t offset;
1358      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1359      roffset=m_samplesize*tid;
1360      offset=roffset;
1361      size_t loop=0;
1362      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1363      size_t outstep=getNoValues();
1364      size_t instep=m_left->getNoValues();
1365      switch (m_op)
1366      {
1367        case SWAP:
1368        for (loop=0;loop<numsteps;++loop)
1369        {
1370                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1371            subroffset+=instep;
1372            offset+=outstep;
1373      }      }
1374        break;
1375        default:
1376        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1377      }
1378      return &m_samples;
1379    }
1380    
1381    const DataTypes::ValueType*
1382    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1383    {
1384      if (m_readytype!='E')
1385      {
1386        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1387      }
1388      if (m_op!=CONDEVAL)
1389      {
1390        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1391      }
1392      size_t subroffset;
1393    
1394      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1395      const ValueType* srcres=0;
1396      if ((*maskres)[subroffset]>0)
1397      {
1398        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1399      }
1400      else
1401      {
1402        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1403      }
1404    
1405      // Now we need to copy the result
1406    
1407      roffset=m_samplesize*tid;
1408      for (int i=0;i<m_samplesize;++i)
1409      {
1410        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1411      }
1412    
1413      return &m_samples;
1414    }
1415    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1416  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1417  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1418  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 998  DataLazy::resolveNP1OUT_P(ValueType& v, Line 1422  DataLazy::resolveNP1OUT_P(ValueType& v,
1422  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1423  // For example, 2+Vector.  // For example, 2+Vector.
1424  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1425  DataTypes::ValueType*  const DataTypes::ValueType*
1426  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1427  {  {
1428  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1429    
# Line 1013  LAZYDEBUG(cout << "Resolve binary: " << Line 1437  LAZYDEBUG(cout << "Resolve binary: " <<
1437    }    }
1438    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1439    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1440    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1441    int steps=(bigloops?1:getNumDPPSample());    {
1442    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1443    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1444    {    size_t leftsize=m_left->getNoValues();
1445      if (!leftScalar && !rightScalar)    size_t rightsize=m_right->getNoValues();
1446      {    size_t chunksize=1;           // how many doubles will be processed in one go
1447         throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");    int leftstep=0;       // how far should the left offset advance after each step
1448      }    int rightstep=0;
1449      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    int numsteps=0;       // total number of steps for the inner loop
1450      chunksize=1;    // for scalar    int oleftstep=0;  // the o variables refer to the outer loop
1451    }        int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1452    int leftStep=((leftExp && (!rightExp || rightScalar))? m_right->getNoValues() : 0);    int onumsteps=1;
1453    int rightStep=((rightExp && (!leftExp || leftScalar))? m_left->getNoValues() : 0);    
1454    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1455      bool RES=(rightExp && rightScalar);
1456      bool LS=(!leftExp && leftScalar); // left is a single scalar
1457      bool RS=(!rightExp && rightScalar);
1458      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1459      bool RN=(!rightExp && !rightScalar);
1460      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1461      bool REN=(rightExp && !rightScalar);
1462    
1463      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1464      {
1465        chunksize=m_left->getNumDPPSample()*leftsize;
1466        leftstep=0;
1467        rightstep=0;
1468        numsteps=1;
1469      }
1470      else if (LES || RES)
1471      {
1472        chunksize=1;
1473        if (LES)        // left is an expanded scalar
1474        {
1475            if (RS)
1476            {
1477               leftstep=1;
1478               rightstep=0;
1479               numsteps=m_left->getNumDPPSample();
1480            }
1481            else        // RN or REN
1482            {
1483               leftstep=0;
1484               oleftstep=1;
1485               rightstep=1;
1486               orightstep=(RN ? -(int)rightsize : 0);
1487               numsteps=rightsize;
1488               onumsteps=m_left->getNumDPPSample();
1489            }
1490        }
1491        else        // right is an expanded scalar
1492        {
1493            if (LS)
1494            {
1495               rightstep=1;
1496               leftstep=0;
1497               numsteps=m_right->getNumDPPSample();
1498            }
1499            else
1500            {
1501               rightstep=0;
1502               orightstep=1;
1503               leftstep=1;
1504               oleftstep=(LN ? -(int)leftsize : 0);
1505               numsteps=leftsize;
1506               onumsteps=m_right->getNumDPPSample();
1507            }
1508        }
1509      }
1510      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1511      {
1512        if (LEN)    // and Right will be a single value
1513        {
1514            chunksize=rightsize;
1515            leftstep=rightsize;
1516            rightstep=0;
1517            numsteps=m_left->getNumDPPSample();
1518            if (RS)
1519            {
1520               numsteps*=leftsize;
1521            }
1522        }
1523        else    // REN
1524        {
1525            chunksize=leftsize;
1526            rightstep=leftsize;
1527            leftstep=0;
1528            numsteps=m_right->getNumDPPSample();
1529            if (LS)
1530            {
1531               numsteps*=rightsize;
1532            }
1533        }
1534      }
1535    
1536      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1537      // Get the values of sub-expressions      // Get the values of sub-expressions
1538    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1539    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1540      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1541    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1542    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1543    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1544    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1545    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1546    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1547    
1548    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1549    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1550    
1551    
1552      roffset=m_samplesize*tid;
1553      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1554    switch(m_op)    switch(m_op)
1555    {    {
1556      case ADD:      case ADD:
# Line 1053  LAZYDEBUG(cout << "Resolve binary: " << Line 1571  LAZYDEBUG(cout << "Resolve binary: " <<
1571      default:      default:
1572      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1573    }    }
1574    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1575    return &v;    return &m_samples;
1576  }  }
1577    
1578    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1579  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1580  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1581  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1582  DataTypes::ValueType*  const DataTypes::ValueType*
1583  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1584  {  {
1585  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1586    
# Line 1083  LAZYDEBUG(cout << "Resolve TensorProduct Line 1589  LAZYDEBUG(cout << "Resolve TensorProduct
1589    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1590    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1591    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1592    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1593    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1594    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1595      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1596    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1597    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1598    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1599      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1600    
1601      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1602    
1603    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1604    cout << getNoValues() << endl;)
1605    
1606    
1607    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1608    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1609    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1610    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1611    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1612    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1613    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1614    
1615      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1616    switch(m_op)    switch(m_op)
1617    {    {
1618      case PROD:      case PROD:
# Line 1097  LAZYDEBUG(cout << "Resolve TensorProduct Line 1620  LAZYDEBUG(cout << "Resolve TensorProduct
1620      {      {
1621            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1622            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1623    
1624    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1625    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1626    
1627            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1628    
1629        lroffset+=leftStep;        lroffset+=leftStep;
1630        rroffset+=rightStep;        rroffset+=rightStep;
1631      }      }
# Line 1106  LAZYDEBUG(cout << "Resolve TensorProduct Line 1634  LAZYDEBUG(cout << "Resolve TensorProduct
1634      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1635    }    }
1636    roffset=offset;    roffset=offset;
1637    return &v;    return &m_samples;
1638    }
1639    
1640    
1641    const DataTypes::ValueType*
1642    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1643    {
1644    #ifdef _OPENMP
1645        int tid=omp_get_thread_num();
1646    #else
1647        int tid=0;
1648    #endif
1649    
1650    #ifdef LAZY_STACK_PROF
1651        stackstart[tid]=&tid;
1652        stackend[tid]=&tid;
1653        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1654        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1655        #pragma omp critical
1656        if (d>maxstackuse)
1657        {
1658    cout << "Max resolve Stack use " << d << endl;
1659            maxstackuse=d;
1660        }
1661        return r;
1662    #else
1663        return resolveNodeSample(tid, sampleNo, roffset);
1664    #endif
1665  }  }
1666    
1667    
1668    // This needs to do the work of the identity constructor
1669    void
1670    DataLazy::resolveToIdentity()
1671    {
1672       if (m_op==IDENTITY)
1673        return;
1674       DataReady_ptr p=resolveNodeWorker();
1675       makeIdentity(p);
1676    }
1677    
1678  /*  void DataLazy::makeIdentity(const DataReady_ptr& p)
1679    \brief Compute the value of the expression for the given sample.  {
1680    \return Vector which stores the value of the subexpression for the given sample.     m_op=IDENTITY;
1681    \param v A vector to store intermediate results.     m_axis_offset=0;
1682    \param offset Index in v to begin storing results.     m_transpose=0;
1683    \param sampleNo Sample number to evaluate.     m_SL=m_SM=m_SR=0;
1684    \param roffset (output parameter) the offset in the return vector where the result begins.     m_children=m_height=0;
1685       m_id=p;
1686       if(p->isConstant()) {m_readytype='C';}
1687       else if(p->isExpanded()) {m_readytype='E';}
1688       else if (p->isTagged()) {m_readytype='T';}
1689       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1690       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1691       m_left.reset();
1692       m_right.reset();
1693    }
1694    
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1695    
1696  // the roffset is the offset within the returned vector where the data begins  DataReady_ptr
1697  const DataTypes::ValueType*  DataLazy::resolve()
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
1698  {  {
1699  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)      resolveToIdentity();
1700      // collapse so we have a 'E' node or an IDENTITY for some other type      return m_id;
1701    if (m_readytype!='E' && m_op!=IDENTITY)  }
1702    
1703    
1704    /* This is really a static method but I think that caused problems in windows */
1705    void
1706    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1707    {
1708      if (dats.empty())
1709    {    {
1710      collapse();      return;
1711    }    }
1712    if (m_op==IDENTITY)      vector<DataLazy*> work;
1713      FunctionSpace fs=dats[0]->getFunctionSpace();
1714      bool match=true;
1715      for (int i=dats.size()-1;i>=0;--i)
1716    {    {
1717      const ValueType& vec=m_id->getVector();      if (dats[i]->m_readytype!='E')
1718      if (m_readytype=='C')      {
1719      {          dats[i]->collapse();
1720      roffset=0;      }
1721      return &(vec);      if (dats[i]->m_op!=IDENTITY)
1722      }      {
1723      roffset=m_id->getPointOffset(sampleNo, 0);          work.push_back(dats[i]);
1724      return &(vec);          if (fs!=dats[i]->getFunctionSpace())
1725            {
1726                match=false;
1727            }
1728        }
1729    }    }
1730    if (m_readytype!='E')    if (work.empty())
1731    {    {
1732      throw DataException("Programmer Error - Collapse did not produce an expanded node.");      return;     // no work to do
1733    }    }
1734    switch (getOpgroup(m_op))    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1735      {     // it is possible that dats[0] is one of the objects which we discarded and
1736            // all the other functionspaces match.
1737        vector<DataExpanded*> dep;
1738        vector<ValueType*> vecs;
1739        for (int i=0;i<work.size();++i)
1740        {
1741            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1742            vecs.push_back(&(dep[i]->getVectorRW()));
1743        }
1744        int totalsamples=work[0]->getNumSamples();
1745        const ValueType* res=0; // Storage for answer
1746        int sample;
1747        #pragma omp parallel private(sample, res)
1748        {
1749            size_t roffset=0;
1750            #pragma omp for schedule(static)
1751            for (sample=0;sample<totalsamples;++sample)
1752            {
1753            roffset=0;
1754            int j;
1755            for (j=work.size()-1;j>=0;--j)
1756            {
1757    #ifdef _OPENMP
1758                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1759    #else
1760                    res=work[j]->resolveNodeSample(0,sample,roffset);
1761    #endif
1762                    DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1763                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1764            }
1765            }
1766        }
1767        // Now we need to load the new results as identity ops into the lazy nodes
1768        for (int i=work.size()-1;i>=0;--i)
1769        {
1770            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1771        }
1772      }
1773      else  // functionspaces do not match
1774    {    {
1775    case G_UNARY:      for (int i=0;i<work.size();++i)
1776    case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);      {
1777    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);          work[i]->resolveToIdentity();
1778    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);      }
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1779    }    }
1780  }  }
1781    
1782    
1783  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1784  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1785  DataReady_ptr  DataReady_ptr
1786  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1787  {  {
   
 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  
   
1788    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1789    {    {
1790      collapse();      collapse();
# Line 1183  LAZYDEBUG(cout << "Buffers=" << m_buffsR Line 1794  LAZYDEBUG(cout << "Buffers=" << m_buffsR
1794      return m_id;      return m_id;
1795    }    }
1796      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
1797    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1798    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1799    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1800    
1801    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1802    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1803    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1804    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1805    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
1806    for (sample=0;sample<totalsamples;++sample)    {
1807    {      size_t roffset=0;
1808  LAZYDEBUG(cout << "################################# " << sample << endl;)  #ifdef LAZY_STACK_PROF
1809        stackstart[omp_get_thread_num()]=&roffset;
1810        stackend[omp_get_thread_num()]=&roffset;
1811    #endif
1812        #pragma omp for schedule(static)
1813        for (sample=0;sample<totalsamples;++sample)
1814        {
1815            roffset=0;
1816  #ifdef _OPENMP  #ifdef _OPENMP
1817      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1818  #else  #else
1819      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1820  #endif  #endif
1821  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1822      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1823  LAZYDEBUG(cerr << "offset=" << outoffset << endl;)              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1824      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1825      {      }
1826      resvec[outoffset]=(*res)[resoffset];    }
1827      }  #ifdef LAZY_STACK_PROF
1828  LAZYDEBUG(cerr << "*********************************" << endl;)    for (int i=0;i<getNumberOfThreads();++i)
1829      {
1830        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1831    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1832        if (r>maxstackuse)
1833        {
1834            maxstackuse=r;
1835        }
1836    }    }
1837      cout << "Max resolve Stack use=" << maxstackuse << endl;
1838    #endif
1839    return resptr;    return resptr;
1840  }  }
1841    
# Line 1224  std::string Line 1843  std::string
1843  DataLazy::toString() const  DataLazy::toString() const
1844  {  {
1845    ostringstream oss;    ostringstream oss;
1846    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1847    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1848      {
1849      case 1:   // tree format
1850        oss << endl;
1851        intoTreeString(oss,"");
1852        break;
1853      case 2:   // just the depth
1854        break;
1855      default:
1856        intoString(oss);
1857        break;
1858      }
1859    return oss.str();    return oss.str();
1860  }  }
1861    
# Line 1233  DataLazy::toString() const Line 1863  DataLazy::toString() const
1863  void  void
1864  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1865  {  {
1866    //    oss << "[" << m_children <<";"<<m_height <<"]";
1867    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1868    {    {
1869    case G_IDENTITY:    case G_IDENTITY:
# Line 1265  DataLazy::intoString(ostringstream& oss) Line 1896  DataLazy::intoString(ostringstream& oss)
1896    case G_UNARY_P:    case G_UNARY_P:
1897    case G_NP1OUT:    case G_NP1OUT:
1898    case G_NP1OUT_P:    case G_NP1OUT_P:
1899      case G_REDUCTION:
1900      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1901      m_left->intoString(oss);      m_left->intoString(oss);
1902      oss << ')';      oss << ')';
# Line 1276  DataLazy::intoString(ostringstream& oss) Line 1908  DataLazy::intoString(ostringstream& oss)
1908      m_right->intoString(oss);      m_right->intoString(oss);
1909      oss << ')';      oss << ')';
1910      break;      break;
1911      case G_NP1OUT_2P:
1912        oss << opToString(m_op) << '(';
1913        m_left->intoString(oss);
1914        oss << ", " << m_axis_offset << ", " << m_transpose;
1915        oss << ')';
1916        break;
1917      case G_CONDEVAL:
1918        oss << opToString(m_op)<< '(' ;
1919        m_mask->intoString(oss);
1920        oss << " ? ";
1921        m_left->intoString(oss);
1922        oss << " : ";
1923        m_right->intoString(oss);
1924        oss << ')';
1925        break;
1926      default:
1927        oss << "UNKNOWN";
1928      }
1929    }
1930    
1931    
1932    void
1933    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1934    {
1935      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1936      switch (getOpgroup(m_op))
1937      {
1938      case G_IDENTITY:
1939        if (m_id->isExpanded())
1940        {
1941           oss << "E";
1942        }
1943        else if (m_id->isTagged())
1944        {
1945          oss << "T";
1946        }
1947        else if (m_id->isConstant())
1948        {
1949          oss << "C";
1950        }
1951        else
1952        {
1953          oss << "?";
1954        }
1955        oss << '@' << m_id.get() << endl;
1956        break;
1957      case G_BINARY:
1958        oss << opToString(m_op) << endl;
1959        indent+='.';
1960        m_left->intoTreeString(oss, indent);
1961        m_right->intoTreeString(oss, indent);
1962        break;
1963      case G_UNARY:
1964      case G_UNARY_P:
1965      case G_NP1OUT:
1966      case G_NP1OUT_P:
1967      case G_REDUCTION:
1968        oss << opToString(m_op) << endl;
1969        indent+='.';
1970        m_left->intoTreeString(oss, indent);
1971        break;
1972      case G_TENSORPROD:
1973        oss << opToString(m_op) << endl;
1974        indent+='.';
1975        m_left->intoTreeString(oss, indent);
1976        m_right->intoTreeString(oss, indent);
1977        break;
1978      case G_NP1OUT_2P:
1979        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1980        indent+='.';
1981        m_left->intoTreeString(oss, indent);
1982        break;
1983    default:    default:
1984      oss << "UNKNOWN";      oss << "UNKNOWN";
1985    }    }
1986  }  }
1987    
1988    
1989  DataAbstract*  DataAbstract*
1990  DataLazy::deepCopy()  DataLazy::deepCopy()
1991  {  {
1992    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1993    {    {
1994    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1995    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1996      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1997      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1998    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1999    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2000    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2002      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003    default:    default:
2004      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2005    }    }
2006  }  }
2007    
2008    
2009    
2010  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2011  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2012  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
# Line 1372  DataLazy::getPointOffset(int sampleNo, Line 2082  DataLazy::getPointOffset(int sampleNo,
2082    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2083  }  }
2084    
2085  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2086  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2087  void  void
2088  DataLazy::setToZero()  DataLazy::setToZero()
2089  {  {
2090    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
2091    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2092    m_op=IDENTITY;  //   m_op=IDENTITY;
2093    m_right.reset();    //   m_right.reset();  
2094    m_left.reset();  //   m_left.reset();
2095    m_readytype='C';  //   m_readytype='C';
2096    m_buffsRequired=1;  //   m_buffsRequired=1;
2097    
2098      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2099      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2100    }
2101    
2102    bool
2103    DataLazy::actsExpanded() const
2104    {
2105        return (m_readytype=='E');
2106  }  }
2107    
2108  }   // end namespace  }   // end namespace

Legend:
Removed from v.2147  
changed lines
  Added in v.4657

  ViewVC Help
Powered by ViewVC 1.1.26