/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2082 by caltinay, Fri Nov 21 01:46:05 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 5938 by jfenwick, Thu Feb 18 06:30:35 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
22  #ifdef _OPENMP  #ifdef _OPENMP
23  #include <omp.h>  #include <omp.h>
24  #endif  #endif
# Line 28  Line 28 
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29  #include "Utils.h"  #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #ifdef USE_NETCDF
34    #include <netcdfcpp.h>
35    #endif
36    
37    #include <iomanip>      // for some fancy formatting in debug
38    
39    using namespace escript::DataTypes;
40    
41    // #define LAZYDEBUG(X) if (privdebug){X;}
42    #define LAZYDEBUG(X)
43    namespace
44    {
45    bool privdebug=false;
46    
47    #define ENABLEDEBUG privdebug=true;
48    #define DISABLEDEBUG privdebug=false;
49    }
50    
51    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
52    
53    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
54    
55    
56    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
57    
58  /*  /*
59  How does DataLazy work?  How does DataLazy work?
60  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 39  A special operation, IDENTITY, stores an Line 66  A special operation, IDENTITY, stores an
66  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
67    
68  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
69  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
70    
71  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
72  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 48  I will refer to individual DataLazy obje Line 75  I will refer to individual DataLazy obje
75  Each node also stores:  Each node also stores:
76  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
77      evaluated.      evaluated.
78  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
79      evaluate the expression.      evaluate the expression.
80  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
81    
# Line 70  The convention that I use, is that the r Line 97  The convention that I use, is that the r
97  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
98  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
99    
100  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
101  1) Add to the ES_optype.  1) Add to the ES_optype.
102  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
103  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 90  namespace escript Line 117  namespace escript
117  namespace  namespace
118  {  {
119    
120    
121    // enabling this will print out when ever the maximum stacksize used by resolve increases
122    // it assumes _OPENMP is also in use
123    //#define LAZY_STACK_PROF
124    
125    
126    
127    #ifndef _OPENMP
128      #ifdef LAZY_STACK_PROF
129      #undef LAZY_STACK_PROF
130      #endif
131    #endif
132    
133    
134    #ifdef LAZY_STACK_PROF
135    std::vector<void*> stackstart(getNumberOfThreads());
136    std::vector<void*> stackend(getNumberOfThreads());
137    size_t maxstackuse=0;
138    #endif
139    
140  enum ES_opgroup  enum ES_opgroup
141  {  {
142     G_UNKNOWN,     G_UNKNOWN,
143     G_IDENTITY,     G_IDENTITY,
144     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
145     G_UNARY,     // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
146       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
147     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
148     G_TENSORPROD     // general tensor product     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
149       G_TENSORPROD,    // general tensor product
150       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
151       G_REDUCTION,     // non-pointwise unary op with a scalar output
152       G_CONDEVAL
153  };  };
154    
155    
# Line 108  string ES_opstrings[]={"UNKNOWN","IDENTI Line 160  string ES_opstrings[]={"UNKNOWN","IDENTI
160              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
161              "asinh","acosh","atanh",              "asinh","acosh","atanh",
162              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
163              "1/","where>0","where<0","where>=0","where<=0",              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
164              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
165              "prod"};              "prod",
166  int ES_opcount=36;              "transpose", "trace",
167                "swapaxes",
168                "minval", "maxval",
169                "condEval"};
170    int ES_opcount=44;
171  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
172              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
173              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
174              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
175              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
176              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
177              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
178              G_TENSORPROD};              G_TENSORPROD,
179                G_NP1OUT_P, G_NP1OUT_P,
180                G_NP1OUT_2P,
181                G_REDUCTION, G_REDUCTION,
182                G_CONDEVAL};
183  inline  inline
184  ES_opgroup  ES_opgroup
185  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 140  resultFS(DataAbstract_ptr left, DataAbst Line 200  resultFS(DataAbstract_ptr left, DataAbst
200    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
201    if (l!=r)    if (l!=r)
202    {    {
203      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
204        if (res==1)
205      {      {
206      return l;      return l;
207      }      }
208      if (l.probeInterpolation(r))      if (res==-1)
209      {      {
210      return r;      return r;
211      }      }
# Line 164  resultShape(DataAbstract_ptr left, DataA Line 225  resultShape(DataAbstract_ptr left, DataA
225        {        {
226          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
227        }        }
228    
229        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
230        {        {
231          return right->getShape();          return right->getShape();
# Line 177  resultShape(DataAbstract_ptr left, DataA Line 239  resultShape(DataAbstract_ptr left, DataA
239      return left->getShape();      return left->getShape();
240  }  }
241    
242    // return the shape for "op left"
243    
244    DataTypes::ShapeType
245    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
246    {
247        switch(op)
248        {
249            case TRANS:
250           {            // for the scoping of variables
251            const DataTypes::ShapeType& s=left->getShape();
252            DataTypes::ShapeType sh;
253            int rank=left->getRank();
254            if (axis_offset<0 || axis_offset>rank)
255            {
256                stringstream e;
257                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
258                throw DataException(e.str());
259            }
260            for (int i=0; i<rank; i++)
261            {
262               int index = (axis_offset+i)%rank;
263               sh.push_back(s[index]); // Append to new shape
264            }
265            return sh;
266           }
267        break;
268        case TRACE:
269           {
270            int rank=left->getRank();
271            if (rank<2)
272            {
273               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
274            }
275            if ((axis_offset>rank-2) || (axis_offset<0))
276            {
277               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
278            }
279            if (rank==2)
280            {
281               return DataTypes::scalarShape;
282            }
283            else if (rank==3)
284            {
285               DataTypes::ShapeType sh;
286                   if (axis_offset==0)
287               {
288                    sh.push_back(left->getShape()[2]);
289                   }
290                   else     // offset==1
291               {
292                sh.push_back(left->getShape()[0]);
293                   }
294               return sh;
295            }
296            else if (rank==4)
297            {
298               DataTypes::ShapeType sh;
299               const DataTypes::ShapeType& s=left->getShape();
300                   if (axis_offset==0)
301               {
302                    sh.push_back(s[2]);
303                    sh.push_back(s[3]);
304                   }
305                   else if (axis_offset==1)
306               {
307                    sh.push_back(s[0]);
308                    sh.push_back(s[3]);
309                   }
310               else     // offset==2
311               {
312                sh.push_back(s[0]);
313                sh.push_back(s[1]);
314               }
315               return sh;
316            }
317            else        // unknown rank
318            {
319               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
320            }
321           }
322        break;
323            default:
324        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
325        }
326    }
327    
328    DataTypes::ShapeType
329    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
330    {
331         // This code taken from the Data.cpp swapaxes() method
332         // Some of the checks are probably redundant here
333         int axis0_tmp,axis1_tmp;
334         const DataTypes::ShapeType& s=left->getShape();
335         DataTypes::ShapeType out_shape;
336         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
337         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
338         int rank=left->getRank();
339         if (rank<2) {
340            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
341         }
342         if (axis0<0 || axis0>rank-1) {
343            stringstream e;
344            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
345            throw DataException(e.str());
346         }
347         if (axis1<0 || axis1>rank-1) {
348            stringstream e;
349            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
350            throw DataException(e.str());
351         }
352         if (axis0 == axis1) {
353             throw DataException("Error - Data::swapaxes: axis indices must be different.");
354         }
355         if (axis0 > axis1) {
356             axis0_tmp=axis1;
357             axis1_tmp=axis0;
358         } else {
359             axis0_tmp=axis0;
360             axis1_tmp=axis1;
361         }
362         for (int i=0; i<rank; i++) {
363           if (i == axis0_tmp) {
364              out_shape.push_back(s[axis1_tmp]);
365           } else if (i == axis1_tmp) {
366              out_shape.push_back(s[axis0_tmp]);
367           } else {
368              out_shape.push_back(s[i]);
369           }
370         }
371        return out_shape;
372    }
373    
374    
375  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
376  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
377  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
# Line 197  GTPShape(DataAbstract_ptr left, DataAbst Line 392  GTPShape(DataAbstract_ptr left, DataAbst
392    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)  { start1 = rank1-axis_offset; }
393    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
394    
395      if (rank0<axis_offset)
396      {
397        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
398      }
399    
400    // Adjust the shapes for transpose    // Adjust the shapes for transpose
401    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
# Line 226  GTPShape(DataAbstract_ptr left, DataAbst Line 425  GTPShape(DataAbstract_ptr left, DataAbst
425       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
426       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
427    }    }
   return shape2;  
 }  
428    
429      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
430      {
431         ostringstream os;
432         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
433         throw DataException(os.str());
434      }
435    
436  // determine the number of points in the result of "left op right"    return shape2;
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
437  }  }
438    
   
439  }   // end anonymous namespace  }   // end anonymous namespace
440    
441    
# Line 279  opToString(ES_optype op) Line 451  opToString(ES_optype op)
451    return ES_opstrings[op];    return ES_opstrings[op];
452  }  }
453    
454    void DataLazy::LazyNodeSetup()
455    {
456    #ifdef _OPENMP
457        int numthreads=omp_get_max_threads();
458        m_samples.resize(numthreads*m_samplesize);
459        m_sampleids=new int[numthreads];
460        for (int i=0;i<numthreads;++i)
461        {
462            m_sampleids[i]=-1;  
463        }
464    #else
465        m_samples.resize(m_samplesize);
466        m_sampleids=new int[1];
467        m_sampleids[0]=-1;
468    #endif  // _OPENMP
469    }
470    
471    
472    // Creates an identity node
473  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
474      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
475      m_op(IDENTITY),      ,m_sampleids(0),
476      m_axis_offset(0),      m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
477  {  {
478     if (p->isLazy())     if (p->isLazy())
479     {     {
# Line 296  DataLazy::DataLazy(DataAbstract_ptr p) Line 484  DataLazy::DataLazy(DataAbstract_ptr p)
484     }     }
485     else     else
486     {     {
487      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
488      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
489      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
490      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
491     }     }
492     m_buffsRequired=1;  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
493  }  }
494    
   
   
   
495  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
496      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
497      m_op(op),      m_op(op),
498      m_axis_offset(0),      m_axis_offset(0),
499      m_transpose(0),      m_transpose(0),
500      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
501  {  {
502     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
503     {     {
504      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
505     }     }
# Line 334  DataLazy::DataLazy(DataAbstract_ptr left Line 515  DataLazy::DataLazy(DataAbstract_ptr left
515     }     }
516     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
517     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
518     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
519     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
520       m_height=m_left->m_height+1;
521       LazyNodeSetup();
522       SIZELIMIT
523  }  }
524    
525    
# Line 346  DataLazy::DataLazy(DataAbstract_ptr left Line 529  DataLazy::DataLazy(DataAbstract_ptr left
529      m_op(op),      m_op(op),
530      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
531  {  {
532    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
533     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
534     {     {
535      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
# Line 362  DataLazy::DataLazy(DataAbstract_ptr left Line 546  DataLazy::DataLazy(DataAbstract_ptr left
546     {     {
547      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
548      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
549    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
550     }     }
551     left->operandCheck(*right);     left->operandCheck(*right);
552    
553     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
554     {     {
555      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
556    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
557     }     }
558     else     else
559     {     {
560      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
561    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
562     }     }
563     if (right->isLazy())     if (right->isLazy())
564     {     {
565      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
566    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
567     }     }
568     else     else
569     {     {
570      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
571    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
572     }     }
573     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
574     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 396  DataLazy::DataLazy(DataAbstract_ptr left Line 585  DataLazy::DataLazy(DataAbstract_ptr left
585      m_readytype='C';      m_readytype='C';
586     }     }
587     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
588     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
589     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
590  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
591       SIZELIMIT
592    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
593  }  }
594    
595  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
# Line 427  DataLazy::DataLazy(DataAbstract_ptr left Line 618  DataLazy::DataLazy(DataAbstract_ptr left
618      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
619      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
620     }     }
621     left->operandCheck(*right);  //    left->operandCheck(*right);
622    
623     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
624     {     {
# Line 460  DataLazy::DataLazy(DataAbstract_ptr left Line 651  DataLazy::DataLazy(DataAbstract_ptr left
651      m_readytype='C';      m_readytype='C';
652     }     }
653     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
654     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
655     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
656  cout << "(4)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
657       SIZELIMIT
658    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
659  }  }
660    
661    
662  DataLazy::~DataLazy()  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
663        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
664        m_op(op),
665        m_axis_offset(axis_offset),
666        m_transpose(0),
667        m_tol(0)
668  {  {
669       if ((getOpgroup(op)!=G_NP1OUT_P))
670       {
671        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
672       }
673       DataLazy_ptr lleft;
674       if (!left->isLazy())
675       {
676        lleft=DataLazy_ptr(new DataLazy(left));
677       }
678       else
679       {
680        lleft=dynamic_pointer_cast<DataLazy>(left);
681       }
682       m_readytype=lleft->m_readytype;
683       m_left=lleft;
684       m_samplesize=getNumDPPSample()*getNoValues();
685       m_children=m_left->m_children+1;
686       m_height=m_left->m_height+1;
687       LazyNodeSetup();
688       SIZELIMIT
689    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
690    }
691    
692    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
693        : parent(left->getFunctionSpace(), left->getShape()),
694        m_op(op),
695        m_axis_offset(0),
696        m_transpose(0),
697        m_tol(tol)
698    {
699       if ((getOpgroup(op)!=G_UNARY_P))
700       {
701        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
702       }
703       DataLazy_ptr lleft;
704       if (!left->isLazy())
705       {
706        lleft=DataLazy_ptr(new DataLazy(left));
707       }
708       else
709       {
710        lleft=dynamic_pointer_cast<DataLazy>(left);
711       }
712       m_readytype=lleft->m_readytype;
713       m_left=lleft;
714       m_samplesize=getNumDPPSample()*getNoValues();
715       m_children=m_left->m_children+1;
716       m_height=m_left->m_height+1;
717       LazyNodeSetup();
718       SIZELIMIT
719    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
720    }
721    
722    
723    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
724        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
725        m_op(op),
726        m_axis_offset(axis0),
727        m_transpose(axis1),
728        m_tol(0)
729    {
730       if ((getOpgroup(op)!=G_NP1OUT_2P))
731       {
732        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
733       }
734       DataLazy_ptr lleft;
735       if (!left->isLazy())
736       {
737        lleft=DataLazy_ptr(new DataLazy(left));
738       }
739       else
740       {
741        lleft=dynamic_pointer_cast<DataLazy>(left);
742       }
743       m_readytype=lleft->m_readytype;
744       m_left=lleft;
745       m_samplesize=getNumDPPSample()*getNoValues();
746       m_children=m_left->m_children+1;
747       m_height=m_left->m_height+1;
748       LazyNodeSetup();
749       SIZELIMIT
750    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
751  }  }
752    
753    
754  int  namespace
 DataLazy::getBuffsRequired() const  
755  {  {
756      return m_buffsRequired;  
757        inline int max3(int a, int b, int c)
758        {
759        int t=(a>b?a:b);
760        return (t>c?t:c);
761    
762        }
763  }  }
764    
765    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
766        : parent(left->getFunctionSpace(), left->getShape()),
767        m_op(CONDEVAL),
768        m_axis_offset(0),
769        m_transpose(0),
770        m_tol(0)
771    {
772    
773  size_t     DataLazy_ptr lmask;
774  DataLazy::getMaxSampleSize() const     DataLazy_ptr lleft;
775       DataLazy_ptr lright;
776       if (!mask->isLazy())
777       {
778        lmask=DataLazy_ptr(new DataLazy(mask));
779       }
780       else
781       {
782        lmask=dynamic_pointer_cast<DataLazy>(mask);
783       }
784       if (!left->isLazy())
785       {
786        lleft=DataLazy_ptr(new DataLazy(left));
787       }
788       else
789       {
790        lleft=dynamic_pointer_cast<DataLazy>(left);
791       }
792       if (!right->isLazy())
793       {
794        lright=DataLazy_ptr(new DataLazy(right));
795       }
796       else
797       {
798        lright=dynamic_pointer_cast<DataLazy>(right);
799       }
800       m_readytype=lmask->m_readytype;
801       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
802       {
803        throw DataException("Programmer Error - condEval arguments must have the same readytype");
804       }
805       m_left=lleft;
806       m_right=lright;
807       m_mask=lmask;
808       m_samplesize=getNumDPPSample()*getNoValues();
809       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
810       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
811       LazyNodeSetup();
812       SIZELIMIT
813    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
814    }
815    
816    
817    
818    DataLazy::~DataLazy()
819  {  {
820      return m_maxsamplesize;     delete[] m_sampleids;
821  }  }
822    
823    
824  /*  /*
825    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
826    This does the work for the collapse method.    This does the work for the collapse method.
827    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
828  */  */
829  DataReady_ptr  DataReady_ptr
830  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
831  {  {
832    if (m_readytype=='E')    if (m_readytype=='E')
833    { // this is more an efficiency concern than anything else    { // this is more an efficiency concern than anything else
# Line 602  DataLazy::collapseToReady() Line 939  DataLazy::collapseToReady()
939      case LEZ:      case LEZ:
940      result=left.whereNonPositive();      result=left.whereNonPositive();
941      break;      break;
942        case NEZ:
943        result=left.whereNonZero(m_tol);
944        break;
945        case EZ:
946        result=left.whereZero(m_tol);
947        break;
948      case SYM:      case SYM:
949      result=left.symmetric();      result=left.symmetric();
950      break;      break;
# Line 611  DataLazy::collapseToReady() Line 954  DataLazy::collapseToReady()
954      case PROD:      case PROD:
955      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
956      break;      break;
957        case TRANS:
958        result=left.transpose(m_axis_offset);
959        break;
960        case TRACE:
961        result=left.trace(m_axis_offset);
962        break;
963        case SWAP:
964        result=left.swapaxes(m_axis_offset, m_transpose);
965        break;
966        case MINVAL:
967        result=left.minval();
968        break;
969        case MAXVAL:
970        result=left.minval();
971        break;
972      default:      default:
973      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
974    }    }
# Line 624  DataLazy::collapseToReady() Line 982  DataLazy::collapseToReady()
982     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
983  */  */
984  void  void
985  DataLazy::collapse()  DataLazy::collapse() const
986  {  {
987    if (m_op==IDENTITY)    if (m_op==IDENTITY)
988    {    {
# Line 638  DataLazy::collapse() Line 996  DataLazy::collapse()
996    m_op=IDENTITY;    m_op=IDENTITY;
997  }  }
998    
999  /*  
1000    \brief Compute the value of the expression (unary operation) for the given sample.  
1001    \return Vector which stores the value of the subexpression for the given sample.  
1002    \param v A vector to store intermediate results.  
1003    \param offset Index in v to begin storing results.  
1004    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1005    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1006        {\
1007    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1008    If the result is stored in v it should be stored at the offset given.        { \
1009    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1010  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1011  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1012  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1013             lroffset+=leftstep; \
1014             rroffset+=rightstep; \
1015          }\
1016          lroffset+=oleftstep;\
1017          rroffset+=orightstep;\
1018        }
1019    
1020    
1021    // The result will be stored in m_samples
1022    // The return value is a pointer to the DataVector, offset is the offset within the return value
1023    const DataTypes::RealVectorType*
1024    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1025    {
1026    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1027        // collapse so we have a 'E' node or an IDENTITY for some other type
1028      if (m_readytype!='E' && m_op!=IDENTITY)
1029      {
1030        collapse();
1031      }
1032      if (m_op==IDENTITY)  
1033      {
1034        const ValueType& vec=m_id->getVectorRO();
1035        roffset=m_id->getPointOffset(sampleNo, 0);
1036    #ifdef LAZY_STACK_PROF
1037    int x;
1038    if (&x<stackend[omp_get_thread_num()])
1039    {
1040           stackend[omp_get_thread_num()]=&x;
1041    }
1042    #endif
1043        return &(vec);
1044      }
1045      if (m_readytype!='E')
1046      {
1047        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1048      }
1049      if (m_sampleids[tid]==sampleNo)
1050      {
1051        roffset=tid*m_samplesize;
1052        return &(m_samples);        // sample is already resolved
1053      }
1054      m_sampleids[tid]=sampleNo;
1055    
1056      switch (getOpgroup(m_op))
1057      {
1058      case G_UNARY:
1059      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1060      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1061      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1062      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1063      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1064      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1065      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1066      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1067      default:
1068        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1069      }
1070    }
1071    
1072    const DataTypes::RealVectorType*
1073    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1074  {  {
1075      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1076      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1077      // processing single points.      // processing single points.
1078        // we will also know we won't get identity nodes
1079    if (m_readytype!='E')    if (m_readytype!='E')
1080    {    {
1081      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1082    }    }
1083    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1084    const double* left=&((*vleft)[roffset]);    {
1085    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1086    roffset=offset;    }
1087      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1088      const double* left=&((*leftres)[roffset]);
1089      roffset=m_samplesize*tid;
1090      double* result=&(m_samples[roffset]);
1091    switch (m_op)    switch (m_op)
1092    {    {
1093      case SIN:        case SIN:  
# Line 762  DataLazy::resolveUnary(ValueType& v, siz Line 1186  DataLazy::resolveUnary(ValueType& v, siz
1186      case LEZ:      case LEZ:
1187      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1188      break;      break;
1189    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1190        case NEZ:
1191        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1192        break;
1193        case EZ:
1194        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1195        break;
1196    
1197      default:      default:
1198      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1199    }    }
1200    return &v;    return &(m_samples);
1201  }  }
1202    
1203    
1204  /*  const DataTypes::RealVectorType*
1205    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1206  {  {
1207      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1208      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1209      // processing single points.      // processing single points.
1210        // we will also know we won't get identity nodes
1211    if (m_readytype!='E')    if (m_readytype!='E')
1212    {    {
1213      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1214    }    }
1215      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1216    size_t subroffset=roffset+m_samplesize;    {
1217    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1218    roffset=offset;    }
1219      size_t loffset=0;
1220      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1221    
1222      roffset=m_samplesize*tid;
1223      unsigned int ndpps=getNumDPPSample();
1224      unsigned int psize=DataTypes::noValues(m_left->getShape());
1225      double* result=&(m_samples[roffset]);
1226      switch (m_op)
1227      {
1228        case MINVAL:
1229        {
1230          for (unsigned int z=0;z<ndpps;++z)
1231          {
1232            FMin op;
1233            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1234            loffset+=psize;
1235            result++;
1236          }
1237        }
1238        break;
1239        case MAXVAL:
1240        {
1241          for (unsigned int z=0;z<ndpps;++z)
1242          {
1243          FMax op;
1244          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1245          loffset+=psize;
1246          result++;
1247          }
1248        }
1249        break;
1250        default:
1251        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1252      }
1253      return &(m_samples);
1254    }
1255    
1256    const DataTypes::RealVectorType*
1257    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1258    {
1259        // we assume that any collapsing has been done before we get here
1260        // since we only have one argument we don't need to think about only
1261        // processing single points.
1262      if (m_readytype!='E')
1263      {
1264        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1265      }
1266      if (m_op==IDENTITY)
1267      {
1268        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1269      }
1270      size_t subroffset;
1271      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1272      roffset=m_samplesize*tid;
1273      size_t loop=0;
1274      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1275      size_t step=getNoValues();
1276      size_t offset=roffset;
1277    switch (m_op)    switch (m_op)
1278    {    {
1279      case SYM:      case SYM:
1280      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1281        {
1282            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1283            subroffset+=step;
1284            offset+=step;
1285        }
1286      break;      break;
1287      case NSYM:      case NSYM:
1288      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1289        {
1290            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1291            subroffset+=step;
1292            offset+=step;
1293        }
1294      break;      break;
1295      default:      default:
1296      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1297    }    }
1298    return &v;    return &m_samples;
1299  }  }
1300    
1301    const DataTypes::RealVectorType*
1302    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1303    {
1304        // we assume that any collapsing has been done before we get here
1305        // since we only have one argument we don't need to think about only
1306        // processing single points.
1307      if (m_readytype!='E')
1308      {
1309        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1310      }
1311      if (m_op==IDENTITY)
1312      {
1313        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1314      }
1315      size_t subroffset;
1316      size_t offset;
1317      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1318      roffset=m_samplesize*tid;
1319      offset=roffset;
1320      size_t loop=0;
1321      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1322      size_t outstep=getNoValues();
1323      size_t instep=m_left->getNoValues();
1324      switch (m_op)
1325      {
1326        case TRACE:
1327        for (loop=0;loop<numsteps;++loop)
1328        {
1329                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1330            subroffset+=instep;
1331            offset+=outstep;
1332        }
1333        break;
1334        case TRANS:
1335        for (loop=0;loop<numsteps;++loop)
1336        {
1337                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1338            subroffset+=instep;
1339            offset+=outstep;
1340        }
1341        break;
1342        default:
1343        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1344      }
1345      return &m_samples;
1346    }
1347    
1348    
1349    const DataTypes::RealVectorType*
1350  #define PROC_OP(TYPE,X)                               \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1351      for (int i=0;i<steps;++i,resultp+=resultStep) \  {
1352      { \    if (m_readytype!='E')
1353         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    {
1354         lroffset+=leftStep; \      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1355         rroffset+=rightStep; \    }
1356      if (m_op==IDENTITY)
1357      {
1358        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1359      }
1360      size_t subroffset;
1361      size_t offset;
1362      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1363      roffset=m_samplesize*tid;
1364      offset=roffset;
1365      size_t loop=0;
1366      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1367      size_t outstep=getNoValues();
1368      size_t instep=m_left->getNoValues();
1369      switch (m_op)
1370      {
1371        case SWAP:
1372        for (loop=0;loop<numsteps;++loop)
1373        {
1374                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1375            subroffset+=instep;
1376            offset+=outstep;
1377      }      }
1378        break;
1379        default:
1380        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1381      }
1382      return &m_samples;
1383    }
1384    
1385    const DataTypes::RealVectorType*
1386    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1387    {
1388      if (m_readytype!='E')
1389      {
1390        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1391      }
1392      if (m_op!=CONDEVAL)
1393      {
1394        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1395      }
1396      size_t subroffset;
1397    
1398      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1399      const ValueType* srcres=0;
1400      if ((*maskres)[subroffset]>0)
1401      {
1402        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1403      }
1404      else
1405      {
1406        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1407      }
1408    
1409      // Now we need to copy the result
1410    
1411      roffset=m_samplesize*tid;
1412      for (int i=0;i<m_samplesize;++i)
1413      {
1414        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1415      }
1416    
1417      return &m_samples;
1418    }
1419    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1420  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1421  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1422  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 842  DataLazy::resolveNP1OUT(ValueType& v, si Line 1426  DataLazy::resolveNP1OUT(ValueType& v, si
1426  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1427  // For example, 2+Vector.  // For example, 2+Vector.
1428  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1429  DataTypes::ValueType*  const DataTypes::RealVectorType*
1430  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1431  {  {
1432  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1433    
1434    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1435      // first work out which of the children are expanded      // first work out which of the children are expanded
1436    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1437    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1438    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1439    int steps=(bigloops?1:getNumDPPSample());    {
1440    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1441    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1442    {    bool leftScalar=(m_left->getRank()==0);
1443      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1444      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1445      chunksize=1;    // for scalar    {
1446    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1447    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1448    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1449    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1450      size_t chunksize=1;           // how many doubles will be processed in one go
1451      int leftstep=0;       // how far should the left offset advance after each step
1452      int rightstep=0;
1453      int numsteps=0;       // total number of steps for the inner loop
1454      int oleftstep=0;  // the o variables refer to the outer loop
1455      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1456      int onumsteps=1;
1457      
1458      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1459      bool RES=(rightExp && rightScalar);
1460      bool LS=(!leftExp && leftScalar); // left is a single scalar
1461      bool RS=(!rightExp && rightScalar);
1462      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1463      bool RN=(!rightExp && !rightScalar);
1464      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1465      bool REN=(rightExp && !rightScalar);
1466    
1467      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1468      {
1469        chunksize=m_left->getNumDPPSample()*leftsize;
1470        leftstep=0;
1471        rightstep=0;
1472        numsteps=1;
1473      }
1474      else if (LES || RES)
1475      {
1476        chunksize=1;
1477        if (LES)        // left is an expanded scalar
1478        {
1479            if (RS)
1480            {
1481               leftstep=1;
1482               rightstep=0;
1483               numsteps=m_left->getNumDPPSample();
1484            }
1485            else        // RN or REN
1486            {
1487               leftstep=0;
1488               oleftstep=1;
1489               rightstep=1;
1490               orightstep=(RN ? -(int)rightsize : 0);
1491               numsteps=rightsize;
1492               onumsteps=m_left->getNumDPPSample();
1493            }
1494        }
1495        else        // right is an expanded scalar
1496        {
1497            if (LS)
1498            {
1499               rightstep=1;
1500               leftstep=0;
1501               numsteps=m_right->getNumDPPSample();
1502            }
1503            else
1504            {
1505               rightstep=0;
1506               orightstep=1;
1507               leftstep=1;
1508               oleftstep=(LN ? -(int)leftsize : 0);
1509               numsteps=leftsize;
1510               onumsteps=m_right->getNumDPPSample();
1511            }
1512        }
1513      }
1514      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1515      {
1516        if (LEN)    // and Right will be a single value
1517        {
1518            chunksize=rightsize;
1519            leftstep=rightsize;
1520            rightstep=0;
1521            numsteps=m_left->getNumDPPSample();
1522            if (RS)
1523            {
1524               numsteps*=leftsize;
1525            }
1526        }
1527        else    // REN
1528        {
1529            chunksize=leftsize;
1530            rightstep=leftsize;
1531            leftstep=0;
1532            numsteps=m_right->getNumDPPSample();
1533            if (LS)
1534            {
1535               numsteps*=rightsize;
1536            }
1537        }
1538      }
1539    
1540      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1541      // Get the values of sub-expressions      // Get the values of sub-expressions
1542    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1543    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1544      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1545    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1546    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1547    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1548    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1549    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1550    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1551    
1552    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1553    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1554    
1555    
1556      roffset=m_samplesize*tid;
1557      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1558    switch(m_op)    switch(m_op)
1559    {    {
1560      case ADD:      case ADD:
# Line 888  cout << "Resolve binary: " << toString() Line 1575  cout << "Resolve binary: " << toString()
1575      default:      default:
1576      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1577    }    }
1578    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1579    return &v;    return &m_samples;
1580  }  }
1581    
1582    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1583  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1584  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1585  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1586  DataTypes::ValueType*  const DataTypes::RealVectorType*
1587  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1588  {  {
1589  cout << "Resolve TensorProduct: " << toString() << endl;  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1590    
1591    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1592      // first work out which of the children are expanded      // first work out which of the children are expanded
1593    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1594    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1595    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1596    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1597    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1598    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1599      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1600    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1601    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1602    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1603      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1604    
1605      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1606    
1607    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1608    cout << getNoValues() << endl;)
1609    
1610    
1611    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1612    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1613    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1614    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1615    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1616    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1617    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1618    
1619      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1620    switch(m_op)    switch(m_op)
1621    {    {
1622      case PROD:      case PROD:
# Line 932  cout << "Resolve TensorProduct: " << toS Line 1624  cout << "Resolve TensorProduct: " << toS
1624      {      {
1625            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1626            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1627    
1628    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1629    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1630    
1631            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1632    
1633        lroffset+=leftStep;        lroffset+=leftStep;
1634        rroffset+=rightStep;        rroffset+=rightStep;
1635      }      }
# Line 941  cout << "Resolve TensorProduct: " << toS Line 1638  cout << "Resolve TensorProduct: " << toS
1638      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1639    }    }
1640    roffset=offset;    roffset=offset;
1641    return &v;    return &m_samples;
1642  }  }
1643    
1644    
1645    const DataTypes::RealVectorType*
1646    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1647    {
1648    #ifdef _OPENMP
1649        int tid=omp_get_thread_num();
1650    #else
1651        int tid=0;
1652    #endif
1653    
1654  /*  #ifdef LAZY_STACK_PROF
1655    \brief Compute the value of the expression for the given sample.      stackstart[tid]=&tid;
1656    \return Vector which stores the value of the subexpression for the given sample.      stackend[tid]=&tid;
1657    \param v A vector to store intermediate results.      const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1658    \param offset Index in v to begin storing results.      size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1659    \param sampleNo Sample number to evaluate.      #pragma omp critical
1660    \param roffset (output parameter) the offset in the return vector where the result begins.      if (d>maxstackuse)
1661        {
1662    cout << "Max resolve Stack use " << d << endl;
1663            maxstackuse=d;
1664        }
1665        return r;
1666    #else
1667        return resolveNodeSample(tid, sampleNo, roffset);
1668    #endif
1669    }
1670    
1671    The return value will be an existing vector so do not deallocate it.  
1672  */  // This needs to do the work of the identity constructor
1673  // the vector and the offset are a place where the method could write its data if it wishes  void
1674  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  DataLazy::resolveToIdentity()
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
1675  {  {
1676  cout << "Resolve sample " << toString() << endl;     if (m_op==IDENTITY)
1677      // collapse so we have a 'E' node or an IDENTITY for some other type      return;
1678    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1679       makeIdentity(p);
1680    }
1681    
1682    void DataLazy::makeIdentity(const DataReady_ptr& p)
1683    {
1684       m_op=IDENTITY;
1685       m_axis_offset=0;
1686       m_transpose=0;
1687       m_SL=m_SM=m_SR=0;
1688       m_children=m_height=0;
1689       m_id=p;
1690       if(p->isConstant()) {m_readytype='C';}
1691       else if(p->isExpanded()) {m_readytype='E';}
1692       else if (p->isTagged()) {m_readytype='T';}
1693       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1694       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1695       m_left.reset();
1696       m_right.reset();
1697    }
1698    
1699    
1700    DataReady_ptr
1701    DataLazy::resolve()
1702    {
1703        resolveToIdentity();
1704        return m_id;
1705    }
1706    
1707    
1708    /* This is really a static method but I think that caused problems in windows */
1709    void
1710    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1711    {
1712      if (dats.empty())
1713    {    {
1714      collapse();      return;
1715    }    }
1716    if (m_op==IDENTITY)      vector<DataLazy*> work;
1717      FunctionSpace fs=dats[0]->getFunctionSpace();
1718      bool match=true;
1719      for (int i=dats.size()-1;i>=0;--i)
1720    {    {
1721      const ValueType& vec=m_id->getVector();      if (dats[i]->m_readytype!='E')
1722      if (m_readytype=='C')      {
1723      {          dats[i]->collapse();
1724      roffset=0;      }
1725      return &(vec);      if (dats[i]->m_op!=IDENTITY)
1726      }      {
1727      roffset=m_id->getPointOffset(sampleNo, 0);          work.push_back(dats[i]);
1728      return &(vec);          if (fs!=dats[i]->getFunctionSpace())
1729            {
1730                match=false;
1731            }
1732        }
1733    }    }
1734    if (m_readytype!='E')    if (work.empty())
1735    {    {
1736      throw DataException("Programmer Error - Collapse did not produce an expanded node.");      return;     // no work to do
1737    }    }
1738    switch (getOpgroup(m_op))    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1739      {     // it is possible that dats[0] is one of the objects which we discarded and
1740            // all the other functionspaces match.
1741        vector<DataExpanded*> dep;
1742        vector<ValueType*> vecs;
1743        for (int i=0;i<work.size();++i)
1744        {
1745            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1746            vecs.push_back(&(dep[i]->getVectorRW()));
1747        }
1748        int totalsamples=work[0]->getNumSamples();
1749        const ValueType* res=0; // Storage for answer
1750        int sample;
1751        #pragma omp parallel private(sample, res)
1752        {
1753            size_t roffset=0;
1754            #pragma omp for schedule(static)
1755            for (sample=0;sample<totalsamples;++sample)
1756            {
1757            roffset=0;
1758            int j;
1759            for (j=work.size()-1;j>=0;--j)
1760            {
1761    #ifdef _OPENMP
1762                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1763    #else
1764                    res=work[j]->resolveNodeSample(0,sample,roffset);
1765    #endif
1766                    RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1767                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1768            }
1769            }
1770        }
1771        // Now we need to load the new results as identity ops into the lazy nodes
1772        for (int i=work.size()-1;i>=0;--i)
1773        {
1774            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1775        }
1776      }
1777      else  // functionspaces do not match
1778    {    {
1779    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      for (int i=0;i<work.size();++i)
1780    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      {
1781    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);          work[i]->resolveToIdentity();
1782    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);      }
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1783    }    }
1784  }  }
1785    
1786    
1787  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1788  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1789  DataReady_ptr  DataReady_ptr
1790  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1791  {  {
   
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
1792    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1793    {    {
1794      collapse();      collapse();
# Line 1016  cout << "Buffers=" << m_buffsRequired << Line 1798  cout << "Buffers=" << m_buffsRequired <<
1798      return m_id;      return m_id;
1799    }    }
1800      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1801    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1802    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1803    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1804    
1805    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1806    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1807    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1808    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1809    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
1810    for (sample=0;sample<totalsamples;++sample)    {
1811    {      size_t roffset=0;
1812  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1813        stackstart[omp_get_thread_num()]=&roffset;
1814        stackend[omp_get_thread_num()]=&roffset;
1815    #endif
1816        #pragma omp for schedule(static)
1817        for (sample=0;sample<totalsamples;++sample)
1818        {
1819            roffset=0;
1820  #ifdef _OPENMP  #ifdef _OPENMP
1821      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1822  #else  #else
1823      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1824  #endif  #endif
1825  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1826      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1827  cerr << "offset=" << outoffset << endl;              RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1828      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1829      {      }
1830      resvec[outoffset]=(*res)[resoffset];    }
1831      }  #ifdef LAZY_STACK_PROF
1832  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1833      {
1834        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1835    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1836        if (r>maxstackuse)
1837        {
1838            maxstackuse=r;
1839        }
1840    }    }
1841      cout << "Max resolve Stack use=" << maxstackuse << endl;
1842    #endif
1843    return resptr;    return resptr;
1844  }  }
1845    
# Line 1057  std::string Line 1847  std::string
1847  DataLazy::toString() const  DataLazy::toString() const
1848  {  {
1849    ostringstream oss;    ostringstream oss;
1850    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1851    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1852      {
1853      case 1:   // tree format
1854        oss << endl;
1855        intoTreeString(oss,"");
1856        break;
1857      case 2:   // just the depth
1858        break;
1859      default:
1860        intoString(oss);
1861        break;
1862      }
1863    return oss.str();    return oss.str();
1864  }  }
1865    
# Line 1066  DataLazy::toString() const Line 1867  DataLazy::toString() const
1867  void  void
1868  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1869  {  {
1870    //    oss << "[" << m_children <<";"<<m_height <<"]";
1871    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1872    {    {
1873    case G_IDENTITY:    case G_IDENTITY:
# Line 1095  DataLazy::intoString(ostringstream& oss) Line 1897  DataLazy::intoString(ostringstream& oss)
1897      oss << ')';      oss << ')';
1898      break;      break;
1899    case G_UNARY:    case G_UNARY:
1900      case G_UNARY_P:
1901    case G_NP1OUT:    case G_NP1OUT:
1902      case G_NP1OUT_P:
1903      case G_REDUCTION:
1904      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1905      m_left->intoString(oss);      m_left->intoString(oss);
1906      oss << ')';      oss << ')';
# Line 1107  DataLazy::intoString(ostringstream& oss) Line 1912  DataLazy::intoString(ostringstream& oss)
1912      m_right->intoString(oss);      m_right->intoString(oss);
1913      oss << ')';      oss << ')';
1914      break;      break;
1915      case G_NP1OUT_2P:
1916        oss << opToString(m_op) << '(';
1917        m_left->intoString(oss);
1918        oss << ", " << m_axis_offset << ", " << m_transpose;
1919        oss << ')';
1920        break;
1921      case G_CONDEVAL:
1922        oss << opToString(m_op)<< '(' ;
1923        m_mask->intoString(oss);
1924        oss << " ? ";
1925        m_left->intoString(oss);
1926        oss << " : ";
1927        m_right->intoString(oss);
1928        oss << ')';
1929        break;
1930    default:    default:
1931      oss << "UNKNOWN";      oss << "UNKNOWN";
1932    }    }
1933  }  }
1934    
1935    
1936    void
1937    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1938    {
1939      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1940      switch (getOpgroup(m_op))
1941      {
1942      case G_IDENTITY:
1943        if (m_id->isExpanded())
1944        {
1945           oss << "E";
1946        }
1947        else if (m_id->isTagged())
1948        {
1949          oss << "T";
1950        }
1951        else if (m_id->isConstant())
1952        {
1953          oss << "C";
1954        }
1955        else
1956        {
1957          oss << "?";
1958        }
1959        oss << '@' << m_id.get() << endl;
1960        break;
1961      case G_BINARY:
1962        oss << opToString(m_op) << endl;
1963        indent+='.';
1964        m_left->intoTreeString(oss, indent);
1965        m_right->intoTreeString(oss, indent);
1966        break;
1967      case G_UNARY:
1968      case G_UNARY_P:
1969      case G_NP1OUT:
1970      case G_NP1OUT_P:
1971      case G_REDUCTION:
1972        oss << opToString(m_op) << endl;
1973        indent+='.';
1974        m_left->intoTreeString(oss, indent);
1975        break;
1976      case G_TENSORPROD:
1977        oss << opToString(m_op) << endl;
1978        indent+='.';
1979        m_left->intoTreeString(oss, indent);
1980        m_right->intoTreeString(oss, indent);
1981        break;
1982      case G_NP1OUT_2P:
1983        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1984        indent+='.';
1985        m_left->intoTreeString(oss, indent);
1986        break;
1987      default:
1988        oss << "UNKNOWN";
1989      }
1990    }
1991    
1992    
1993  DataAbstract*  DataAbstract*
1994  DataLazy::deepCopy()  DataLazy::deepCopy() const
1995  {  {
1996    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1997    {    {
1998    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1999    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
2000      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2001      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2002    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2003    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2004    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2005      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2006      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2007    default:    default:
2008      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2009    }    }
2010  }  }
2011    
2012    
2013    
2014  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2015  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2016  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
2017  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2018  // form part of the expression.  // form part of the expression.
2019  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2020  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2021  DataLazy::getLength() const  DataLazy::getLength() const
2022  {  {
2023    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1149  DataLazy::getSlice(const DataTypes::Regi Line 2032  DataLazy::getSlice(const DataTypes::Regi
2032    
2033    
2034  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2035  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2036  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2037                   int dataPointNo)                   int dataPointNo)
2038  {  {
# Line 1175  DataLazy::getPointOffset(int sampleNo, Line 2058  DataLazy::getPointOffset(int sampleNo,
2058  }  }
2059    
2060  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2061  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2062  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2063                   int dataPointNo) const                   int dataPointNo) const
2064  {  {
# Line 1203  DataLazy::getPointOffset(int sampleNo, Line 2086  DataLazy::getPointOffset(int sampleNo,
2086    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2087  }  }
2088    
2089  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2090  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2091  void  void
2092  DataLazy::setToZero()  DataLazy::setToZero()
2093  {  {
2094    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2095    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2096    m_op=IDENTITY;  //   m_op=IDENTITY;
2097    m_right.reset();    //   m_right.reset();  
2098    m_left.reset();  //   m_left.reset();
2099    m_readytype='C';  //   m_readytype='C';
2100    m_buffsRequired=1;  //   m_buffsRequired=1;
2101    
2102      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2103      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2104    }
2105    
2106    bool
2107    DataLazy::actsExpanded() const
2108    {
2109        return (m_readytype=='E');
2110  }  }
2111    
2112  }   // end namespace  }   // end namespace

Legend:
Removed from v.2082  
changed lines
  Added in v.5938

  ViewVC Help
Powered by ViewVC 1.1.26