/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2082 by caltinay, Fri Nov 21 01:46:05 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 5464 by jfenwick, Sat Feb 14 00:25:03 2015 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2015 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
22  #ifdef _OPENMP  #ifdef _OPENMP
23  #include <omp.h>  #include <omp.h>
24  #endif  #endif
# Line 28  Line 28 
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29  #include "Utils.h"  #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #ifdef USE_NETCDF
34    #include <netcdfcpp.h>
35    #endif
36    
37    #include <iomanip>      // for some fancy formatting in debug
38    
39    // #define LAZYDEBUG(X) if (privdebug){X;}
40    #define LAZYDEBUG(X)
41    namespace
42    {
43    bool privdebug=false;
44    
45    #define ENABLEDEBUG privdebug=true;
46    #define DISABLEDEBUG privdebug=false;
47    }
48    
49    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
50    
51    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
52    
53    
54    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
55    
56  /*  /*
57  How does DataLazy work?  How does DataLazy work?
58  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 39  A special operation, IDENTITY, stores an Line 64  A special operation, IDENTITY, stores an
64  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
65    
66  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
67  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
68    
69  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
70  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 48  I will refer to individual DataLazy obje Line 73  I will refer to individual DataLazy obje
73  Each node also stores:  Each node also stores:
74  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
75      evaluated.      evaluated.
76  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
77      evaluate the expression.      evaluate the expression.
78  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
79    
# Line 70  The convention that I use, is that the r Line 95  The convention that I use, is that the r
95  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
96  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
97    
98  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
99  1) Add to the ES_optype.  1) Add to the ES_optype.
100  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
101  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 90  namespace escript Line 115  namespace escript
115  namespace  namespace
116  {  {
117    
118    
119    // enabling this will print out when ever the maximum stacksize used by resolve increases
120    // it assumes _OPENMP is also in use
121    //#define LAZY_STACK_PROF
122    
123    
124    
125    #ifndef _OPENMP
126      #ifdef LAZY_STACK_PROF
127      #undef LAZY_STACK_PROF
128      #endif
129    #endif
130    
131    
132    #ifdef LAZY_STACK_PROF
133    std::vector<void*> stackstart(getNumberOfThreads());
134    std::vector<void*> stackend(getNumberOfThreads());
135    size_t maxstackuse=0;
136    #endif
137    
138  enum ES_opgroup  enum ES_opgroup
139  {  {
140     G_UNKNOWN,     G_UNKNOWN,
141     G_IDENTITY,     G_IDENTITY,
142     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
143     G_UNARY,     // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
144       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
145     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
146     G_TENSORPROD     // general tensor product     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
147       G_TENSORPROD,    // general tensor product
148       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
149       G_REDUCTION,     // non-pointwise unary op with a scalar output
150       G_CONDEVAL
151  };  };
152    
153    
# Line 108  string ES_opstrings[]={"UNKNOWN","IDENTI Line 158  string ES_opstrings[]={"UNKNOWN","IDENTI
158              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
159              "asinh","acosh","atanh",              "asinh","acosh","atanh",
160              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
161              "1/","where>0","where<0","where>=0","where<=0",              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
162              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
163              "prod"};              "prod",
164  int ES_opcount=36;              "transpose", "trace",
165                "swapaxes",
166                "minval", "maxval",
167                "condEval"};
168    int ES_opcount=44;
169  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
170              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
172              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
173              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
174              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
175              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
176              G_TENSORPROD};              G_TENSORPROD,
177                G_NP1OUT_P, G_NP1OUT_P,
178                G_NP1OUT_2P,
179                G_REDUCTION, G_REDUCTION,
180                G_CONDEVAL};
181  inline  inline
182  ES_opgroup  ES_opgroup
183  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 140  resultFS(DataAbstract_ptr left, DataAbst Line 198  resultFS(DataAbstract_ptr left, DataAbst
198    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
199    if (l!=r)    if (l!=r)
200    {    {
201      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
202        if (res==1)
203      {      {
204      return l;      return l;
205      }      }
206      if (l.probeInterpolation(r))      if (res==-1)
207      {      {
208      return r;      return r;
209      }      }
# Line 164  resultShape(DataAbstract_ptr left, DataA Line 223  resultShape(DataAbstract_ptr left, DataA
223        {        {
224          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
225        }        }
226    
227        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
228        {        {
229          return right->getShape();          return right->getShape();
# Line 177  resultShape(DataAbstract_ptr left, DataA Line 237  resultShape(DataAbstract_ptr left, DataA
237      return left->getShape();      return left->getShape();
238  }  }
239    
240    // return the shape for "op left"
241    
242    DataTypes::ShapeType
243    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
244    {
245        switch(op)
246        {
247            case TRANS:
248           {            // for the scoping of variables
249            const DataTypes::ShapeType& s=left->getShape();
250            DataTypes::ShapeType sh;
251            int rank=left->getRank();
252            if (axis_offset<0 || axis_offset>rank)
253            {
254                stringstream e;
255                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
256                throw DataException(e.str());
257            }
258            for (int i=0; i<rank; i++)
259            {
260               int index = (axis_offset+i)%rank;
261               sh.push_back(s[index]); // Append to new shape
262            }
263            return sh;
264           }
265        break;
266        case TRACE:
267           {
268            int rank=left->getRank();
269            if (rank<2)
270            {
271               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
272            }
273            if ((axis_offset>rank-2) || (axis_offset<0))
274            {
275               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
276            }
277            if (rank==2)
278            {
279               return DataTypes::scalarShape;
280            }
281            else if (rank==3)
282            {
283               DataTypes::ShapeType sh;
284                   if (axis_offset==0)
285               {
286                    sh.push_back(left->getShape()[2]);
287                   }
288                   else     // offset==1
289               {
290                sh.push_back(left->getShape()[0]);
291                   }
292               return sh;
293            }
294            else if (rank==4)
295            {
296               DataTypes::ShapeType sh;
297               const DataTypes::ShapeType& s=left->getShape();
298                   if (axis_offset==0)
299               {
300                    sh.push_back(s[2]);
301                    sh.push_back(s[3]);
302                   }
303                   else if (axis_offset==1)
304               {
305                    sh.push_back(s[0]);
306                    sh.push_back(s[3]);
307                   }
308               else     // offset==2
309               {
310                sh.push_back(s[0]);
311                sh.push_back(s[1]);
312               }
313               return sh;
314            }
315            else        // unknown rank
316            {
317               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
318            }
319           }
320        break;
321            default:
322        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
323        }
324    }
325    
326    DataTypes::ShapeType
327    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
328    {
329         // This code taken from the Data.cpp swapaxes() method
330         // Some of the checks are probably redundant here
331         int axis0_tmp,axis1_tmp;
332         const DataTypes::ShapeType& s=left->getShape();
333         DataTypes::ShapeType out_shape;
334         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
335         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
336         int rank=left->getRank();
337         if (rank<2) {
338            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
339         }
340         if (axis0<0 || axis0>rank-1) {
341            stringstream e;
342            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
343            throw DataException(e.str());
344         }
345         if (axis1<0 || axis1>rank-1) {
346            stringstream e;
347            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
348            throw DataException(e.str());
349         }
350         if (axis0 == axis1) {
351             throw DataException("Error - Data::swapaxes: axis indices must be different.");
352         }
353         if (axis0 > axis1) {
354             axis0_tmp=axis1;
355             axis1_tmp=axis0;
356         } else {
357             axis0_tmp=axis0;
358             axis1_tmp=axis1;
359         }
360         for (int i=0; i<rank; i++) {
361           if (i == axis0_tmp) {
362              out_shape.push_back(s[axis1_tmp]);
363           } else if (i == axis1_tmp) {
364              out_shape.push_back(s[axis0_tmp]);
365           } else {
366              out_shape.push_back(s[i]);
367           }
368         }
369        return out_shape;
370    }
371    
372    
373  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
374  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
375  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
# Line 197  GTPShape(DataAbstract_ptr left, DataAbst Line 390  GTPShape(DataAbstract_ptr left, DataAbst
390    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)  { start1 = rank1-axis_offset; }
391    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
392    
393      if (rank0<axis_offset)
394      {
395        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
396      }
397    
398    // Adjust the shapes for transpose    // Adjust the shapes for transpose
399    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
# Line 226  GTPShape(DataAbstract_ptr left, DataAbst Line 423  GTPShape(DataAbstract_ptr left, DataAbst
423       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
424       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
425    }    }
   return shape2;  
 }  
426    
427      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
428      {
429         ostringstream os;
430         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
431         throw DataException(os.str());
432      }
433    
434  // determine the number of points in the result of "left op right"    return shape2;
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
435  }  }
436    
   
437  }   // end anonymous namespace  }   // end anonymous namespace
438    
439    
# Line 279  opToString(ES_optype op) Line 449  opToString(ES_optype op)
449    return ES_opstrings[op];    return ES_opstrings[op];
450  }  }
451    
452    void DataLazy::LazyNodeSetup()
453    {
454    #ifdef _OPENMP
455        int numthreads=omp_get_max_threads();
456        m_samples.resize(numthreads*m_samplesize);
457        m_sampleids=new int[numthreads];
458        for (int i=0;i<numthreads;++i)
459        {
460            m_sampleids[i]=-1;  
461        }
462    #else
463        m_samples.resize(m_samplesize);
464        m_sampleids=new int[1];
465        m_sampleids[0]=-1;
466    #endif  // _OPENMP
467    }
468    
469    
470    // Creates an identity node
471  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
472      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
473      m_op(IDENTITY),      ,m_sampleids(0),
474      m_axis_offset(0),      m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
475  {  {
476     if (p->isLazy())     if (p->isLazy())
477     {     {
# Line 296  DataLazy::DataLazy(DataAbstract_ptr p) Line 482  DataLazy::DataLazy(DataAbstract_ptr p)
482     }     }
483     else     else
484     {     {
485      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
486      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
487      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
488      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
489     }     }
490     m_buffsRequired=1;  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
491  }  }
492    
   
   
   
493  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
494      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
495      m_op(op),      m_op(op),
496      m_axis_offset(0),      m_axis_offset(0),
497      m_transpose(0),      m_transpose(0),
498      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
499  {  {
500     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
501     {     {
502      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
503     }     }
# Line 334  DataLazy::DataLazy(DataAbstract_ptr left Line 513  DataLazy::DataLazy(DataAbstract_ptr left
513     }     }
514     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
515     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
516     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
517     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
518       m_height=m_left->m_height+1;
519       LazyNodeSetup();
520       SIZELIMIT
521  }  }
522    
523    
# Line 346  DataLazy::DataLazy(DataAbstract_ptr left Line 527  DataLazy::DataLazy(DataAbstract_ptr left
527      m_op(op),      m_op(op),
528      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
529  {  {
530    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
531     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
532     {     {
533      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
# Line 362  DataLazy::DataLazy(DataAbstract_ptr left Line 544  DataLazy::DataLazy(DataAbstract_ptr left
544     {     {
545      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
546      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
547    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
548     }     }
549     left->operandCheck(*right);     left->operandCheck(*right);
550    
551     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
552     {     {
553      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
554    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
555     }     }
556     else     else
557     {     {
558      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
559    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
560     }     }
561     if (right->isLazy())     if (right->isLazy())
562     {     {
563      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
564    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
565     }     }
566     else     else
567     {     {
568      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
569    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
570     }     }
571     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
572     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 396  DataLazy::DataLazy(DataAbstract_ptr left Line 583  DataLazy::DataLazy(DataAbstract_ptr left
583      m_readytype='C';      m_readytype='C';
584     }     }
585     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
586     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
587     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
588  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
589       SIZELIMIT
590    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
591  }  }
592    
593  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
# Line 427  DataLazy::DataLazy(DataAbstract_ptr left Line 616  DataLazy::DataLazy(DataAbstract_ptr left
616      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
617      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
618     }     }
619     left->operandCheck(*right);  //    left->operandCheck(*right);
620    
621     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
622     {     {
# Line 460  DataLazy::DataLazy(DataAbstract_ptr left Line 649  DataLazy::DataLazy(DataAbstract_ptr left
649      m_readytype='C';      m_readytype='C';
650     }     }
651     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
652     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
653     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
654  cout << "(4)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
655       SIZELIMIT
656    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
657  }  }
658    
659    
660  DataLazy::~DataLazy()  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
661        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
662        m_op(op),
663        m_axis_offset(axis_offset),
664        m_transpose(0),
665        m_tol(0)
666    {
667       if ((getOpgroup(op)!=G_NP1OUT_P))
668       {
669        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
670       }
671       DataLazy_ptr lleft;
672       if (!left->isLazy())
673       {
674        lleft=DataLazy_ptr(new DataLazy(left));
675       }
676       else
677       {
678        lleft=dynamic_pointer_cast<DataLazy>(left);
679       }
680       m_readytype=lleft->m_readytype;
681       m_left=lleft;
682       m_samplesize=getNumDPPSample()*getNoValues();
683       m_children=m_left->m_children+1;
684       m_height=m_left->m_height+1;
685       LazyNodeSetup();
686       SIZELIMIT
687    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
688    }
689    
690    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
691        : parent(left->getFunctionSpace(), left->getShape()),
692        m_op(op),
693        m_axis_offset(0),
694        m_transpose(0),
695        m_tol(tol)
696  {  {
697       if ((getOpgroup(op)!=G_UNARY_P))
698       {
699        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
700       }
701       DataLazy_ptr lleft;
702       if (!left->isLazy())
703       {
704        lleft=DataLazy_ptr(new DataLazy(left));
705       }
706       else
707       {
708        lleft=dynamic_pointer_cast<DataLazy>(left);
709       }
710       m_readytype=lleft->m_readytype;
711       m_left=lleft;
712       m_samplesize=getNumDPPSample()*getNoValues();
713       m_children=m_left->m_children+1;
714       m_height=m_left->m_height+1;
715       LazyNodeSetup();
716       SIZELIMIT
717    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
718  }  }
719    
720    
721  int  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
722  DataLazy::getBuffsRequired() const      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
723        m_op(op),
724        m_axis_offset(axis0),
725        m_transpose(axis1),
726        m_tol(0)
727  {  {
728      return m_buffsRequired;     if ((getOpgroup(op)!=G_NP1OUT_2P))
729       {
730        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
731       }
732       DataLazy_ptr lleft;
733       if (!left->isLazy())
734       {
735        lleft=DataLazy_ptr(new DataLazy(left));
736       }
737       else
738       {
739        lleft=dynamic_pointer_cast<DataLazy>(left);
740       }
741       m_readytype=lleft->m_readytype;
742       m_left=lleft;
743       m_samplesize=getNumDPPSample()*getNoValues();
744       m_children=m_left->m_children+1;
745       m_height=m_left->m_height+1;
746       LazyNodeSetup();
747       SIZELIMIT
748    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
749  }  }
750    
751    
752  size_t  namespace
 DataLazy::getMaxSampleSize() const  
753  {  {
754      return m_maxsamplesize;  
755        inline int max3(int a, int b, int c)
756        {
757        int t=(a>b?a:b);
758        return (t>c?t:c);
759    
760        }
761    }
762    
763    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
764        : parent(left->getFunctionSpace(), left->getShape()),
765        m_op(CONDEVAL),
766        m_axis_offset(0),
767        m_transpose(0),
768        m_tol(0)
769    {
770    
771       DataLazy_ptr lmask;
772       DataLazy_ptr lleft;
773       DataLazy_ptr lright;
774       if (!mask->isLazy())
775       {
776        lmask=DataLazy_ptr(new DataLazy(mask));
777       }
778       else
779       {
780        lmask=dynamic_pointer_cast<DataLazy>(mask);
781       }
782       if (!left->isLazy())
783       {
784        lleft=DataLazy_ptr(new DataLazy(left));
785       }
786       else
787       {
788        lleft=dynamic_pointer_cast<DataLazy>(left);
789       }
790       if (!right->isLazy())
791       {
792        lright=DataLazy_ptr(new DataLazy(right));
793       }
794       else
795       {
796        lright=dynamic_pointer_cast<DataLazy>(right);
797       }
798       m_readytype=lmask->m_readytype;
799       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
800       {
801        throw DataException("Programmer Error - condEval arguments must have the same readytype");
802       }
803       m_left=lleft;
804       m_right=lright;
805       m_mask=lmask;
806       m_samplesize=getNumDPPSample()*getNoValues();
807       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
808       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
809       LazyNodeSetup();
810       SIZELIMIT
811    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
812    }
813    
814    
815    
816    DataLazy::~DataLazy()
817    {
818       delete[] m_sampleids;
819  }  }
820    
821    
822  /*  /*
823    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
824    This does the work for the collapse method.    This does the work for the collapse method.
825    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
826  */  */
827  DataReady_ptr  DataReady_ptr
828  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
829  {  {
830    if (m_readytype=='E')    if (m_readytype=='E')
831    { // this is more an efficiency concern than anything else    { // this is more an efficiency concern than anything else
# Line 602  DataLazy::collapseToReady() Line 937  DataLazy::collapseToReady()
937      case LEZ:      case LEZ:
938      result=left.whereNonPositive();      result=left.whereNonPositive();
939      break;      break;
940        case NEZ:
941        result=left.whereNonZero(m_tol);
942        break;
943        case EZ:
944        result=left.whereZero(m_tol);
945        break;
946      case SYM:      case SYM:
947      result=left.symmetric();      result=left.symmetric();
948      break;      break;
# Line 611  DataLazy::collapseToReady() Line 952  DataLazy::collapseToReady()
952      case PROD:      case PROD:
953      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
954      break;      break;
955        case TRANS:
956        result=left.transpose(m_axis_offset);
957        break;
958        case TRACE:
959        result=left.trace(m_axis_offset);
960        break;
961        case SWAP:
962        result=left.swapaxes(m_axis_offset, m_transpose);
963        break;
964        case MINVAL:
965        result=left.minval();
966        break;
967        case MAXVAL:
968        result=left.minval();
969        break;
970      default:      default:
971      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
972    }    }
# Line 624  DataLazy::collapseToReady() Line 980  DataLazy::collapseToReady()
980     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
981  */  */
982  void  void
983  DataLazy::collapse()  DataLazy::collapse() const
984  {  {
985    if (m_op==IDENTITY)    if (m_op==IDENTITY)
986    {    {
# Line 638  DataLazy::collapse() Line 994  DataLazy::collapse()
994    m_op=IDENTITY;    m_op=IDENTITY;
995  }  }
996    
997  /*  
998    \brief Compute the value of the expression (unary operation) for the given sample.  
999    \return Vector which stores the value of the subexpression for the given sample.  
1000    \param v A vector to store intermediate results.  
1001    \param offset Index in v to begin storing results.  
1002    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1003    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1004        {\
1005    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1006    If the result is stored in v it should be stored at the offset given.        { \
1007    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1008  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1009  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1010  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1011             lroffset+=leftstep; \
1012             rroffset+=rightstep; \
1013          }\
1014          lroffset+=oleftstep;\
1015          rroffset+=orightstep;\
1016        }
1017    
1018    
1019    // The result will be stored in m_samples
1020    // The return value is a pointer to the DataVector, offset is the offset within the return value
1021    const DataTypes::ValueType*
1022    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1023    {
1024    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1025        // collapse so we have a 'E' node or an IDENTITY for some other type
1026      if (m_readytype!='E' && m_op!=IDENTITY)
1027      {
1028        collapse();
1029      }
1030      if (m_op==IDENTITY)  
1031      {
1032        const ValueType& vec=m_id->getVectorRO();
1033        roffset=m_id->getPointOffset(sampleNo, 0);
1034    #ifdef LAZY_STACK_PROF
1035    int x;
1036    if (&x<stackend[omp_get_thread_num()])
1037    {
1038           stackend[omp_get_thread_num()]=&x;
1039    }
1040    #endif
1041        return &(vec);
1042      }
1043      if (m_readytype!='E')
1044      {
1045        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1046      }
1047      if (m_sampleids[tid]==sampleNo)
1048      {
1049        roffset=tid*m_samplesize;
1050        return &(m_samples);        // sample is already resolved
1051      }
1052      m_sampleids[tid]=sampleNo;
1053    
1054      switch (getOpgroup(m_op))
1055      {
1056      case G_UNARY:
1057      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1058      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1059      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1060      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1061      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1062      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1063      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1064      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1065      default:
1066        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1067      }
1068    }
1069    
1070    const DataTypes::ValueType*
1071    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1072  {  {
1073      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1074      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1075      // processing single points.      // processing single points.
1076        // we will also know we won't get identity nodes
1077    if (m_readytype!='E')    if (m_readytype!='E')
1078    {    {
1079      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1080    }    }
1081    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1082    const double* left=&((*vleft)[roffset]);    {
1083    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1084    roffset=offset;    }
1085      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1086      const double* left=&((*leftres)[roffset]);
1087      roffset=m_samplesize*tid;
1088      double* result=&(m_samples[roffset]);
1089    switch (m_op)    switch (m_op)
1090    {    {
1091      case SIN:        case SIN:  
# Line 762  DataLazy::resolveUnary(ValueType& v, siz Line 1184  DataLazy::resolveUnary(ValueType& v, siz
1184      case LEZ:      case LEZ:
1185      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1186      break;      break;
1187    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1188        case NEZ:
1189        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1190        break;
1191        case EZ:
1192        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1193        break;
1194    
1195      default:      default:
1196      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1197    }    }
1198    return &v;    return &(m_samples);
1199  }  }
1200    
1201    
1202  /*  const DataTypes::ValueType*
1203    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1204  {  {
1205      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1206      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1207      // processing single points.      // processing single points.
1208        // we will also know we won't get identity nodes
1209    if (m_readytype!='E')    if (m_readytype!='E')
1210    {    {
1211      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1212    }    }
1213      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1214    size_t subroffset=roffset+m_samplesize;    {
1215    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1216    roffset=offset;    }
1217      size_t loffset=0;
1218      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1219    
1220      roffset=m_samplesize*tid;
1221      unsigned int ndpps=getNumDPPSample();
1222      unsigned int psize=DataTypes::noValues(m_left->getShape());
1223      double* result=&(m_samples[roffset]);
1224      switch (m_op)
1225      {
1226        case MINVAL:
1227        {
1228          for (unsigned int z=0;z<ndpps;++z)
1229          {
1230            FMin op;
1231            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1232            loffset+=psize;
1233            result++;
1234          }
1235        }
1236        break;
1237        case MAXVAL:
1238        {
1239          for (unsigned int z=0;z<ndpps;++z)
1240          {
1241          FMax op;
1242          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1243          loffset+=psize;
1244          result++;
1245          }
1246        }
1247        break;
1248        default:
1249        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1250      }
1251      return &(m_samples);
1252    }
1253    
1254    const DataTypes::ValueType*
1255    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1256    {
1257        // we assume that any collapsing has been done before we get here
1258        // since we only have one argument we don't need to think about only
1259        // processing single points.
1260      if (m_readytype!='E')
1261      {
1262        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1263      }
1264      if (m_op==IDENTITY)
1265      {
1266        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1267      }
1268      size_t subroffset;
1269      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1270      roffset=m_samplesize*tid;
1271      size_t loop=0;
1272      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1273      size_t step=getNoValues();
1274      size_t offset=roffset;
1275    switch (m_op)    switch (m_op)
1276    {    {
1277      case SYM:      case SYM:
1278      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1279        {
1280            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1281            subroffset+=step;
1282            offset+=step;
1283        }
1284      break;      break;
1285      case NSYM:      case NSYM:
1286      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1287        {
1288            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1289            subroffset+=step;
1290            offset+=step;
1291        }
1292      break;      break;
1293      default:      default:
1294      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1295    }    }
1296    return &v;    return &m_samples;
1297  }  }
1298    
1299    const DataTypes::ValueType*
1300    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1301    {
1302        // we assume that any collapsing has been done before we get here
1303        // since we only have one argument we don't need to think about only
1304        // processing single points.
1305      if (m_readytype!='E')
1306      {
1307        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1308      }
1309      if (m_op==IDENTITY)
1310      {
1311        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1312      }
1313      size_t subroffset;
1314      size_t offset;
1315      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1316      roffset=m_samplesize*tid;
1317      offset=roffset;
1318      size_t loop=0;
1319      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1320      size_t outstep=getNoValues();
1321      size_t instep=m_left->getNoValues();
1322      switch (m_op)
1323      {
1324        case TRACE:
1325        for (loop=0;loop<numsteps;++loop)
1326        {
1327                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1328            subroffset+=instep;
1329            offset+=outstep;
1330        }
1331        break;
1332        case TRANS:
1333        for (loop=0;loop<numsteps;++loop)
1334        {
1335                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1336            subroffset+=instep;
1337            offset+=outstep;
1338        }
1339        break;
1340        default:
1341        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1342      }
1343      return &m_samples;
1344    }
1345    
1346    
1347    const DataTypes::ValueType*
1348  #define PROC_OP(TYPE,X)                               \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1349      for (int i=0;i<steps;++i,resultp+=resultStep) \  {
1350      { \    if (m_readytype!='E')
1351         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    {
1352         lroffset+=leftStep; \      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1353         rroffset+=rightStep; \    }
1354      if (m_op==IDENTITY)
1355      {
1356        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1357      }
1358      size_t subroffset;
1359      size_t offset;
1360      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1361      roffset=m_samplesize*tid;
1362      offset=roffset;
1363      size_t loop=0;
1364      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1365      size_t outstep=getNoValues();
1366      size_t instep=m_left->getNoValues();
1367      switch (m_op)
1368      {
1369        case SWAP:
1370        for (loop=0;loop<numsteps;++loop)
1371        {
1372                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1373            subroffset+=instep;
1374            offset+=outstep;
1375      }      }
1376        break;
1377        default:
1378        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1379      }
1380      return &m_samples;
1381    }
1382    
1383    const DataTypes::ValueType*
1384    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1385    {
1386      if (m_readytype!='E')
1387      {
1388        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1389      }
1390      if (m_op!=CONDEVAL)
1391      {
1392        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1393      }
1394      size_t subroffset;
1395    
1396      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1397      const ValueType* srcres=0;
1398      if ((*maskres)[subroffset]>0)
1399      {
1400        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1401      }
1402      else
1403      {
1404        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1405      }
1406    
1407      // Now we need to copy the result
1408    
1409      roffset=m_samplesize*tid;
1410      for (int i=0;i<m_samplesize;++i)
1411      {
1412        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1413      }
1414    
1415      return &m_samples;
1416    }
1417    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1418  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1419  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1420  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 842  DataLazy::resolveNP1OUT(ValueType& v, si Line 1424  DataLazy::resolveNP1OUT(ValueType& v, si
1424  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1425  // For example, 2+Vector.  // For example, 2+Vector.
1426  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1427  DataTypes::ValueType*  const DataTypes::ValueType*
1428  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1429  {  {
1430  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1431    
1432    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1433      // first work out which of the children are expanded      // first work out which of the children are expanded
1434    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1435    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1436    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1437    int steps=(bigloops?1:getNumDPPSample());    {
1438    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1439    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1440    {    bool leftScalar=(m_left->getRank()==0);
1441      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1442      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1443      chunksize=1;    // for scalar    {
1444    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1445    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1446    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1447    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1448      size_t chunksize=1;           // how many doubles will be processed in one go
1449      int leftstep=0;       // how far should the left offset advance after each step
1450      int rightstep=0;
1451      int numsteps=0;       // total number of steps for the inner loop
1452      int oleftstep=0;  // the o variables refer to the outer loop
1453      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1454      int onumsteps=1;
1455      
1456      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1457      bool RES=(rightExp && rightScalar);
1458      bool LS=(!leftExp && leftScalar); // left is a single scalar
1459      bool RS=(!rightExp && rightScalar);
1460      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1461      bool RN=(!rightExp && !rightScalar);
1462      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1463      bool REN=(rightExp && !rightScalar);
1464    
1465      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1466      {
1467        chunksize=m_left->getNumDPPSample()*leftsize;
1468        leftstep=0;
1469        rightstep=0;
1470        numsteps=1;
1471      }
1472      else if (LES || RES)
1473      {
1474        chunksize=1;
1475        if (LES)        // left is an expanded scalar
1476        {
1477            if (RS)
1478            {
1479               leftstep=1;
1480               rightstep=0;
1481               numsteps=m_left->getNumDPPSample();
1482            }
1483            else        // RN or REN
1484            {
1485               leftstep=0;
1486               oleftstep=1;
1487               rightstep=1;
1488               orightstep=(RN ? -(int)rightsize : 0);
1489               numsteps=rightsize;
1490               onumsteps=m_left->getNumDPPSample();
1491            }
1492        }
1493        else        // right is an expanded scalar
1494        {
1495            if (LS)
1496            {
1497               rightstep=1;
1498               leftstep=0;
1499               numsteps=m_right->getNumDPPSample();
1500            }
1501            else
1502            {
1503               rightstep=0;
1504               orightstep=1;
1505               leftstep=1;
1506               oleftstep=(LN ? -(int)leftsize : 0);
1507               numsteps=leftsize;
1508               onumsteps=m_right->getNumDPPSample();
1509            }
1510        }
1511      }
1512      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1513      {
1514        if (LEN)    // and Right will be a single value
1515        {
1516            chunksize=rightsize;
1517            leftstep=rightsize;
1518            rightstep=0;
1519            numsteps=m_left->getNumDPPSample();
1520            if (RS)
1521            {
1522               numsteps*=leftsize;
1523            }
1524        }
1525        else    // REN
1526        {
1527            chunksize=leftsize;
1528            rightstep=leftsize;
1529            leftstep=0;
1530            numsteps=m_right->getNumDPPSample();
1531            if (LS)
1532            {
1533               numsteps*=rightsize;
1534            }
1535        }
1536      }
1537    
1538      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1539      // Get the values of sub-expressions      // Get the values of sub-expressions
1540    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1541    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1542      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1543    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1544    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1545    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1546    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1547    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1548    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1549    
1550    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1551    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1552    
1553    
1554      roffset=m_samplesize*tid;
1555      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1556    switch(m_op)    switch(m_op)
1557    {    {
1558      case ADD:      case ADD:
# Line 888  cout << "Resolve binary: " << toString() Line 1573  cout << "Resolve binary: " << toString()
1573      default:      default:
1574      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1575    }    }
1576    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1577    return &v;    return &m_samples;
1578  }  }
1579    
1580    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1581  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1582  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1583  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1584  DataTypes::ValueType*  const DataTypes::ValueType*
1585  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1586  {  {
1587  cout << "Resolve TensorProduct: " << toString() << endl;  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1588    
1589    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1590      // first work out which of the children are expanded      // first work out which of the children are expanded
1591    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1592    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1593    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1594    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1595    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1596    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1597      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1598    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1599    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1600    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1601      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1602    
1603      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1604    
1605    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1606    cout << getNoValues() << endl;)
1607    
1608    
1609    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1610    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1611    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1612    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1613    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1614    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1615    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1616    
1617      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1618    switch(m_op)    switch(m_op)
1619    {    {
1620      case PROD:      case PROD:
# Line 932  cout << "Resolve TensorProduct: " << toS Line 1622  cout << "Resolve TensorProduct: " << toS
1622      {      {
1623            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1624            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1625    
1626    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1627    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1628    
1629            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1630    
1631        lroffset+=leftStep;        lroffset+=leftStep;
1632        rroffset+=rightStep;        rroffset+=rightStep;
1633      }      }
# Line 941  cout << "Resolve TensorProduct: " << toS Line 1636  cout << "Resolve TensorProduct: " << toS
1636      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1637    }    }
1638    roffset=offset;    roffset=offset;
1639    return &v;    return &m_samples;
1640  }  }
1641    
1642    
1643    const DataTypes::ValueType*
1644    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1645    {
1646    #ifdef _OPENMP
1647        int tid=omp_get_thread_num();
1648    #else
1649        int tid=0;
1650    #endif
1651    
1652  /*  #ifdef LAZY_STACK_PROF
1653    \brief Compute the value of the expression for the given sample.      stackstart[tid]=&tid;
1654    \return Vector which stores the value of the subexpression for the given sample.      stackend[tid]=&tid;
1655    \param v A vector to store intermediate results.      const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1656    \param offset Index in v to begin storing results.      size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1657    \param sampleNo Sample number to evaluate.      #pragma omp critical
1658    \param roffset (output parameter) the offset in the return vector where the result begins.      if (d>maxstackuse)
1659        {
1660    cout << "Max resolve Stack use " << d << endl;
1661            maxstackuse=d;
1662        }
1663        return r;
1664    #else
1665        return resolveNodeSample(tid, sampleNo, roffset);
1666    #endif
1667    }
1668    
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1669    
1670  // the roffset is the offset within the returned vector where the data begins  // This needs to do the work of the identity constructor
1671  const DataTypes::ValueType*  void
1672  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveToIdentity()
1673  {  {
1674  cout << "Resolve sample " << toString() << endl;     if (m_op==IDENTITY)
1675      // collapse so we have a 'E' node or an IDENTITY for some other type      return;
1676    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1677       makeIdentity(p);
1678    }
1679    
1680    void DataLazy::makeIdentity(const DataReady_ptr& p)
1681    {
1682       m_op=IDENTITY;
1683       m_axis_offset=0;
1684       m_transpose=0;
1685       m_SL=m_SM=m_SR=0;
1686       m_children=m_height=0;
1687       m_id=p;
1688       if(p->isConstant()) {m_readytype='C';}
1689       else if(p->isExpanded()) {m_readytype='E';}
1690       else if (p->isTagged()) {m_readytype='T';}
1691       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1692       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1693       m_left.reset();
1694       m_right.reset();
1695    }
1696    
1697    
1698    DataReady_ptr
1699    DataLazy::resolve()
1700    {
1701        resolveToIdentity();
1702        return m_id;
1703    }
1704    
1705    
1706    /* This is really a static method but I think that caused problems in windows */
1707    void
1708    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1709    {
1710      if (dats.empty())
1711    {    {
1712      collapse();      return;
1713    }    }
1714    if (m_op==IDENTITY)      vector<DataLazy*> work;
1715      FunctionSpace fs=dats[0]->getFunctionSpace();
1716      bool match=true;
1717      for (int i=dats.size()-1;i>=0;--i)
1718    {    {
1719      const ValueType& vec=m_id->getVector();      if (dats[i]->m_readytype!='E')
1720      if (m_readytype=='C')      {
1721      {          dats[i]->collapse();
1722      roffset=0;      }
1723      return &(vec);      if (dats[i]->m_op!=IDENTITY)
1724      }      {
1725      roffset=m_id->getPointOffset(sampleNo, 0);          work.push_back(dats[i]);
1726      return &(vec);          if (fs!=dats[i]->getFunctionSpace())
1727            {
1728                match=false;
1729            }
1730        }
1731    }    }
1732    if (m_readytype!='E')    if (work.empty())
1733    {    {
1734      throw DataException("Programmer Error - Collapse did not produce an expanded node.");      return;     // no work to do
1735    }    }
1736    switch (getOpgroup(m_op))    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1737      {     // it is possible that dats[0] is one of the objects which we discarded and
1738            // all the other functionspaces match.
1739        vector<DataExpanded*> dep;
1740        vector<ValueType*> vecs;
1741        for (int i=0;i<work.size();++i)
1742        {
1743            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1744            vecs.push_back(&(dep[i]->getVectorRW()));
1745        }
1746        int totalsamples=work[0]->getNumSamples();
1747        const ValueType* res=0; // Storage for answer
1748        int sample;
1749        #pragma omp parallel private(sample, res)
1750        {
1751            size_t roffset=0;
1752            #pragma omp for schedule(static)
1753            for (sample=0;sample<totalsamples;++sample)
1754            {
1755            roffset=0;
1756            int j;
1757            for (j=work.size()-1;j>=0;--j)
1758            {
1759    #ifdef _OPENMP
1760                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1761    #else
1762                    res=work[j]->resolveNodeSample(0,sample,roffset);
1763    #endif
1764                    DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1765                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1766            }
1767            }
1768        }
1769        // Now we need to load the new results as identity ops into the lazy nodes
1770        for (int i=work.size()-1;i>=0;--i)
1771        {
1772            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1773        }
1774      }
1775      else  // functionspaces do not match
1776    {    {
1777    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      for (int i=0;i<work.size();++i)
1778    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      {
1779    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);          work[i]->resolveToIdentity();
1780    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);      }
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1781    }    }
1782  }  }
1783    
1784    
1785  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1786  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1787  DataReady_ptr  DataReady_ptr
1788  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1789  {  {
   
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
1790    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1791    {    {
1792      collapse();      collapse();
# Line 1016  cout << "Buffers=" << m_buffsRequired << Line 1796  cout << "Buffers=" << m_buffsRequired <<
1796      return m_id;      return m_id;
1797    }    }
1798      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1799    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1800    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1801    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1802    
1803    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1804    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1805    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1806    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1807    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
1808    for (sample=0;sample<totalsamples;++sample)    {
1809    {      size_t roffset=0;
1810  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1811        stackstart[omp_get_thread_num()]=&roffset;
1812        stackend[omp_get_thread_num()]=&roffset;
1813    #endif
1814        #pragma omp for schedule(static)
1815        for (sample=0;sample<totalsamples;++sample)
1816        {
1817            roffset=0;
1818  #ifdef _OPENMP  #ifdef _OPENMP
1819      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1820  #else  #else
1821      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1822  #endif  #endif
1823  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1824      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1825  cerr << "offset=" << outoffset << endl;              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1826      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1827      {      }
1828      resvec[outoffset]=(*res)[resoffset];    }
1829      }  #ifdef LAZY_STACK_PROF
1830  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1831      {
1832        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1833    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1834        if (r>maxstackuse)
1835        {
1836            maxstackuse=r;
1837        }
1838    }    }
1839      cout << "Max resolve Stack use=" << maxstackuse << endl;
1840    #endif
1841    return resptr;    return resptr;
1842  }  }
1843    
# Line 1057  std::string Line 1845  std::string
1845  DataLazy::toString() const  DataLazy::toString() const
1846  {  {
1847    ostringstream oss;    ostringstream oss;
1848    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1849    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1850      {
1851      case 1:   // tree format
1852        oss << endl;
1853        intoTreeString(oss,"");
1854        break;
1855      case 2:   // just the depth
1856        break;
1857      default:
1858        intoString(oss);
1859        break;
1860      }
1861    return oss.str();    return oss.str();
1862  }  }
1863    
# Line 1066  DataLazy::toString() const Line 1865  DataLazy::toString() const
1865  void  void
1866  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1867  {  {
1868    //    oss << "[" << m_children <<";"<<m_height <<"]";
1869    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1870    {    {
1871    case G_IDENTITY:    case G_IDENTITY:
# Line 1095  DataLazy::intoString(ostringstream& oss) Line 1895  DataLazy::intoString(ostringstream& oss)
1895      oss << ')';      oss << ')';
1896      break;      break;
1897    case G_UNARY:    case G_UNARY:
1898      case G_UNARY_P:
1899    case G_NP1OUT:    case G_NP1OUT:
1900      case G_NP1OUT_P:
1901      case G_REDUCTION:
1902      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1903      m_left->intoString(oss);      m_left->intoString(oss);
1904      oss << ')';      oss << ')';
# Line 1107  DataLazy::intoString(ostringstream& oss) Line 1910  DataLazy::intoString(ostringstream& oss)
1910      m_right->intoString(oss);      m_right->intoString(oss);
1911      oss << ')';      oss << ')';
1912      break;      break;
1913      case G_NP1OUT_2P:
1914        oss << opToString(m_op) << '(';
1915        m_left->intoString(oss);
1916        oss << ", " << m_axis_offset << ", " << m_transpose;
1917        oss << ')';
1918        break;
1919      case G_CONDEVAL:
1920        oss << opToString(m_op)<< '(' ;
1921        m_mask->intoString(oss);
1922        oss << " ? ";
1923        m_left->intoString(oss);
1924        oss << " : ";
1925        m_right->intoString(oss);
1926        oss << ')';
1927        break;
1928      default:
1929        oss << "UNKNOWN";
1930      }
1931    }
1932    
1933    
1934    void
1935    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1936    {
1937      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1938      switch (getOpgroup(m_op))
1939      {
1940      case G_IDENTITY:
1941        if (m_id->isExpanded())
1942        {
1943           oss << "E";
1944        }
1945        else if (m_id->isTagged())
1946        {
1947          oss << "T";
1948        }
1949        else if (m_id->isConstant())
1950        {
1951          oss << "C";
1952        }
1953        else
1954        {
1955          oss << "?";
1956        }
1957        oss << '@' << m_id.get() << endl;
1958        break;
1959      case G_BINARY:
1960        oss << opToString(m_op) << endl;
1961        indent+='.';
1962        m_left->intoTreeString(oss, indent);
1963        m_right->intoTreeString(oss, indent);
1964        break;
1965      case G_UNARY:
1966      case G_UNARY_P:
1967      case G_NP1OUT:
1968      case G_NP1OUT_P:
1969      case G_REDUCTION:
1970        oss << opToString(m_op) << endl;
1971        indent+='.';
1972        m_left->intoTreeString(oss, indent);
1973        break;
1974      case G_TENSORPROD:
1975        oss << opToString(m_op) << endl;
1976        indent+='.';
1977        m_left->intoTreeString(oss, indent);
1978        m_right->intoTreeString(oss, indent);
1979        break;
1980      case G_NP1OUT_2P:
1981        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1982        indent+='.';
1983        m_left->intoTreeString(oss, indent);
1984        break;
1985    default:    default:
1986      oss << "UNKNOWN";      oss << "UNKNOWN";
1987    }    }
1988  }  }
1989    
1990    
1991  DataAbstract*  DataAbstract*
1992  DataLazy::deepCopy()  DataLazy::deepCopy()
1993  {  {
1994    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1995    {    {
1996    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1997    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1998      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1999      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2000    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2001    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2002    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2004      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2005    default:    default:
2006      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2007    }    }
2008  }  }
2009    
2010    
2011    
2012  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2013  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2014  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
# Line 1203  DataLazy::getPointOffset(int sampleNo, Line 2084  DataLazy::getPointOffset(int sampleNo,
2084    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2085  }  }
2086    
2087  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2088  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2089  void  void
2090  DataLazy::setToZero()  DataLazy::setToZero()
2091  {  {
2092    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
2093    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2094    m_op=IDENTITY;  //   m_op=IDENTITY;
2095    m_right.reset();    //   m_right.reset();  
2096    m_left.reset();  //   m_left.reset();
2097    m_readytype='C';  //   m_readytype='C';
2098    m_buffsRequired=1;  //   m_buffsRequired=1;
2099    
2100      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2101      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2102    }
2103    
2104    bool
2105    DataLazy::actsExpanded() const
2106    {
2107        return (m_readytype=='E');
2108  }  }
2109    
2110  }   // end namespace  }   // end namespace

Legend:
Removed from v.2082  
changed lines
  Added in v.5464

  ViewVC Help
Powered by ViewVC 1.1.26