/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1910 by jfenwick, Thu Oct 23 03:05:28 2008 UTC trunk/escript/src/DataLazy.cpp revision 2791 by jfenwick, Tue Dec 1 03:36:36 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #include <iomanip>      // for some fancy formatting in debug
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49    
50  /*  /*
51  How does DataLazy work?  How does DataLazy work?
# Line 47  I will refer to individual DataLazy obje Line 67  I will refer to individual DataLazy obje
67  Each node also stores:  Each node also stores:
68  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69      evaluated.      evaluated.
 - m_length ~ how many values would be stored in the answer if the expression was evaluated.  
70  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71      evaluate the expression.      evaluate the expression.
72  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
# Line 69  The convention that I use, is that the r Line 88  The convention that I use, is that the r
88    
89  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91    
92    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93    1) Add to the ES_optype.
94    2) determine what opgroup your operation belongs to (X)
95    3) add a string for the op to the end of ES_opstrings
96    4) increase ES_opcount
97    5) add an entry (X) to opgroups
98    6) add an entry to the switch in collapseToReady
99    7) add an entry to resolveX
100  */  */
101    
102    
# Line 78  using namespace boost; Line 106  using namespace boost;
106  namespace escript  namespace escript
107  {  {
108    
 const std::string&  
 opToString(ES_optype op);  
   
109  namespace  namespace
110  {  {
111    
112    
113    // enabling this will print out when ever the maximum stacksize used by resolve increases
114    // it assumes _OPENMP is also in use
115    //#define LAZY_STACK_PROF
116    
117    
118    
119    #ifndef _OPENMP
120      #ifdef LAZY_STACK_PROF
121      #undef LAZY_STACK_PROF
122      #endif
123    #endif
124    
125    
126    #ifdef LAZY_STACK_PROF
127    std::vector<void*> stackstart(getNumberOfThreads());
128    std::vector<void*> stackend(getNumberOfThreads());
129    size_t maxstackuse=0;
130    #endif
131    
132  enum ES_opgroup  enum ES_opgroup
133  {  {
134     G_UNKNOWN,     G_UNKNOWN,
135     G_IDENTITY,     G_IDENTITY,
136     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
137     G_UNARY      // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
138       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
139       G_NP1OUT,        // non-pointwise op with one output
140       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
141       G_TENSORPROD,    // general tensor product
142       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
143       G_REDUCTION      // non-pointwise unary op with a scalar output
144  };  };
145    
146    
# Line 100  string ES_opstrings[]={"UNKNOWN","IDENTI Line 151  string ES_opstrings[]={"UNKNOWN","IDENTI
151              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
152              "asinh","acosh","atanh",              "asinh","acosh","atanh",
153              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
154              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
155  int ES_opcount=32;              "symmetric","nonsymmetric",
156                "prod",
157                "transpose", "trace",
158                "swapaxes",
159                "minval", "maxval"};
160    int ES_opcount=43;
161  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
162              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
163              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
164              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
165              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
166              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
167                G_NP1OUT,G_NP1OUT,
168                G_TENSORPROD,
169                G_NP1OUT_P, G_NP1OUT_P,
170                G_NP1OUT_2P,
171                G_REDUCTION, G_REDUCTION};
172  inline  inline
173  ES_opgroup  ES_opgroup
174  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 124  resultFS(DataAbstract_ptr left, DataAbst Line 185  resultFS(DataAbstract_ptr left, DataAbst
185      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
186      // programming error exception.      // programming error exception.
187    
188      FunctionSpace l=left->getFunctionSpace();
189      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
190      {    if (l!=r)
191          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
192      }      if (r.probeInterpolation(l))
193      return left->getFunctionSpace();      {
194        return l;
195        }
196        if (l.probeInterpolation(r))
197        {
198        return r;
199        }
200        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
201      }
202      return l;
203  }  }
204    
205  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
206    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
207  DataTypes::ShapeType  DataTypes::ShapeType
208  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
209  {  {
210      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
211      {      {
212        if (getOpgroup(op)!=G_BINARY)        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
213        {        {
214          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
215        }        }
216    
217        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
218        {        {
219          return right->getShape();          return right->getShape();
# Line 155  resultShape(DataAbstract_ptr left, DataA Line 227  resultShape(DataAbstract_ptr left, DataA
227      return left->getShape();      return left->getShape();
228  }  }
229    
230  // determine the number of points in the result of "left op right"  // return the shape for "op left"
231  size_t  
232  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataTypes::ShapeType
233    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
234  {  {
235     switch (getOpgroup(op))      switch(op)
236     {      {
237     case G_BINARY: return left->getLength();          case TRANS:
238     case G_UNARY: return left->getLength();         {            // for the scoping of variables
239     default:          const DataTypes::ShapeType& s=left->getShape();
240      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
241     }          int rank=left->getRank();
242            if (axis_offset<0 || axis_offset>rank)
243            {
244                stringstream e;
245                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
246                throw DataException(e.str());
247            }
248            for (int i=0; i<rank; i++)
249            {
250               int index = (axis_offset+i)%rank;
251               sh.push_back(s[index]); // Append to new shape
252            }
253            return sh;
254           }
255        break;
256        case TRACE:
257           {
258            int rank=left->getRank();
259            if (rank<2)
260            {
261               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
262            }
263            if ((axis_offset>rank-2) || (axis_offset<0))
264            {
265               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
266            }
267            if (rank==2)
268            {
269               return DataTypes::scalarShape;
270            }
271            else if (rank==3)
272            {
273               DataTypes::ShapeType sh;
274                   if (axis_offset==0)
275               {
276                    sh.push_back(left->getShape()[2]);
277                   }
278                   else     // offset==1
279               {
280                sh.push_back(left->getShape()[0]);
281                   }
282               return sh;
283            }
284            else if (rank==4)
285            {
286               DataTypes::ShapeType sh;
287               const DataTypes::ShapeType& s=left->getShape();
288                   if (axis_offset==0)
289               {
290                    sh.push_back(s[2]);
291                    sh.push_back(s[3]);
292                   }
293                   else if (axis_offset==1)
294               {
295                    sh.push_back(s[0]);
296                    sh.push_back(s[3]);
297                   }
298               else     // offset==2
299               {
300                sh.push_back(s[0]);
301                sh.push_back(s[1]);
302               }
303               return sh;
304            }
305            else        // unknown rank
306            {
307               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
308            }
309           }
310        break;
311            default:
312        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
313        }
314  }  }
315    
316  // determine the number of samples requires to evaluate an expression combining left and right  DataTypes::ShapeType
317  int  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
318  {  {
319     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
320     {       // Some of the checks are probably redundant here
321     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
322     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
323     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
324     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
325      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
326     }       int rank=left->getRank();
327         if (rank<2) {
328            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
329         }
330         if (axis0<0 || axis0>rank-1) {
331            stringstream e;
332            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
333            throw DataException(e.str());
334         }
335         if (axis1<0 || axis1>rank-1) {
336            stringstream e;
337            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
338            throw DataException(e.str());
339         }
340         if (axis0 == axis1) {
341             throw DataException("Error - Data::swapaxes: axis indices must be different.");
342         }
343         if (axis0 > axis1) {
344             axis0_tmp=axis1;
345             axis1_tmp=axis0;
346         } else {
347             axis0_tmp=axis0;
348             axis1_tmp=axis1;
349         }
350         for (int i=0; i<rank; i++) {
351           if (i == axis0_tmp) {
352              out_shape.push_back(s[axis1_tmp]);
353           } else if (i == axis1_tmp) {
354              out_shape.push_back(s[axis0_tmp]);
355           } else {
356              out_shape.push_back(s[i]);
357           }
358         }
359        return out_shape;
360  }  }
361    
362    
363    // determine the output shape for the general tensor product operation
364    // the additional parameters return information required later for the product
365    // the majority of this code is copy pasted from C_General_Tensor_Product
366    DataTypes::ShapeType
367    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
368    {
369        
370      // Get rank and shape of inputs
371      int rank0 = left->getRank();
372      int rank1 = right->getRank();
373      const DataTypes::ShapeType& shape0 = left->getShape();
374      const DataTypes::ShapeType& shape1 = right->getShape();
375    
376      // Prepare for the loops of the product and verify compatibility of shapes
377      int start0=0, start1=0;
378      if (transpose == 0)       {}
379      else if (transpose == 1)  { start0 = axis_offset; }
380      else if (transpose == 2)  { start1 = rank1-axis_offset; }
381      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
382    
383      if (rank0<axis_offset)
384      {
385        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
386      }
387    
388      // Adjust the shapes for transpose
389      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
390      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
391      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
392      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
393    
394      // Prepare for the loops of the product
395      SL=1, SM=1, SR=1;
396      for (int i=0; i<rank0-axis_offset; i++)   {
397        SL *= tmpShape0[i];
398      }
399      for (int i=rank0-axis_offset; i<rank0; i++)   {
400        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
401          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
402        }
403        SM *= tmpShape0[i];
404      }
405      for (int i=axis_offset; i<rank1; i++)     {
406        SR *= tmpShape1[i];
407      }
408    
409      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
410      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
411      {         // block to limit the scope of out_index
412         int out_index=0;
413         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
414         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
415      }
416    
417      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
418      {
419         ostringstream os;
420         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
421         throw DataException(os.str());
422      }
423    
424      return shape2;
425    }
426    
427  }   // end anonymous namespace  }   // end anonymous namespace
428    
429    
# Line 198  opToString(ES_optype op) Line 439  opToString(ES_optype op)
439    return ES_opstrings[op];    return ES_opstrings[op];
440  }  }
441    
442    void DataLazy::LazyNodeSetup()
443    {
444    #ifdef _OPENMP
445        int numthreads=omp_get_max_threads();
446        m_samples.resize(numthreads*m_samplesize);
447        m_sampleids=new int[numthreads];
448        for (int i=0;i<numthreads;++i)
449        {
450            m_sampleids[i]=-1;  
451        }
452    #else
453        m_samples.resize(m_samplesize);
454        m_sampleids=new int[1];
455        m_sampleids[0]=-1;
456    #endif  // _OPENMP
457    }
458    
459    
460    // Creates an identity node
461  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
462      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
463      m_op(IDENTITY)      ,m_sampleids(0),
464        m_samples(1)
465  {  {
466     if (p->isLazy())     if (p->isLazy())
467     {     {
# Line 212  DataLazy::DataLazy(DataAbstract_ptr p) Line 472  DataLazy::DataLazy(DataAbstract_ptr p)
472     }     }
473     else     else
474     {     {
475      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
476      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
477      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
478      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
479     }     }
480     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
481  }  }
482    
   
   
   
483  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
484      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
485      m_op(op)      m_op(op),
486        m_axis_offset(0),
487        m_transpose(0),
488        m_SL(0), m_SM(0), m_SR(0)
489  {  {
490     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
491     {     {
492      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
493     }     }
494    
495     DataLazy_ptr lleft;     DataLazy_ptr lleft;
496     if (!left->isLazy())     if (!left->isLazy())
497     {     {
# Line 245  DataLazy::DataLazy(DataAbstract_ptr left Line 502  DataLazy::DataLazy(DataAbstract_ptr left
502      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
503     }     }
504     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
505     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
506     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
507       m_children=m_left->m_children+1;
508       m_height=m_left->m_height+1;
509       LazyNodeSetup();
510       SIZELIMIT
511  }  }
512    
513    
514    // In this constructor we need to consider interpolation
515  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
516      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
517      m_op(op)      m_op(op),
518        m_SL(0), m_SM(0), m_SR(0)
519  {  {
520     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
521       if ((getOpgroup(op)!=G_BINARY))
522     {     {
523      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
524     }     }
525    
526       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
527       {
528        FunctionSpace fs=getFunctionSpace();
529        Data ltemp(left);
530        Data tmp(ltemp,fs);
531        left=tmp.borrowDataPtr();
532       }
533       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
534       {
535        Data tmp(Data(right),getFunctionSpace());
536        right=tmp.borrowDataPtr();
537    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
538       }
539       left->operandCheck(*right);
540    
541     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
542     {     {
543      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
544    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
545     }     }
546     else     else
547     {     {
548      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
549    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
550     }     }
551     if (right->isLazy())     if (right->isLazy())
552     {     {
553      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
554    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
555     }     }
556     else     else
557     {     {
558      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
559    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
560     }     }
561     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
562     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 290  DataLazy::DataLazy(DataAbstract_ptr left Line 572  DataLazy::DataLazy(DataAbstract_ptr left
572     {     {
573      m_readytype='C';      m_readytype='C';
574     }     }
575     m_length=resultLength(m_left,m_right,m_op);     m_samplesize=getNumDPPSample()*getNoValues();
576     m_samplesize=getNumDPPSample()*getNoValues();         m_children=m_left->m_children+m_right->m_children+2;
577     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
578  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
579       SIZELIMIT
580    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
581  }  }
582    
583    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
584        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
585        m_op(op),
586        m_axis_offset(axis_offset),
587        m_transpose(transpose)
588    {
589       if ((getOpgroup(op)!=G_TENSORPROD))
590       {
591        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
592       }
593       if ((transpose>2) || (transpose<0))
594       {
595        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
596       }
597       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
598       {
599        FunctionSpace fs=getFunctionSpace();
600        Data ltemp(left);
601        Data tmp(ltemp,fs);
602        left=tmp.borrowDataPtr();
603       }
604       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
605       {
606        Data tmp(Data(right),getFunctionSpace());
607        right=tmp.borrowDataPtr();
608       }
609    //    left->operandCheck(*right);
610    
611  DataLazy::~DataLazy()     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
612       {
613        m_left=dynamic_pointer_cast<DataLazy>(left);
614       }
615       else
616       {
617        m_left=DataLazy_ptr(new DataLazy(left));
618       }
619       if (right->isLazy())
620       {
621        m_right=dynamic_pointer_cast<DataLazy>(right);
622       }
623       else
624       {
625        m_right=DataLazy_ptr(new DataLazy(right));
626       }
627       char lt=m_left->m_readytype;
628       char rt=m_right->m_readytype;
629       if (lt=='E' || rt=='E')
630       {
631        m_readytype='E';
632       }
633       else if (lt=='T' || rt=='T')
634       {
635        m_readytype='T';
636       }
637       else
638       {
639        m_readytype='C';
640       }
641       m_samplesize=getNumDPPSample()*getNoValues();
642       m_children=m_left->m_children+m_right->m_children+2;
643       m_height=max(m_left->m_height,m_right->m_height)+1;
644       LazyNodeSetup();
645       SIZELIMIT
646    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
647    }
648    
649    
650    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
651        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
652        m_op(op),
653        m_axis_offset(axis_offset),
654        m_transpose(0),
655        m_tol(0)
656    {
657       if ((getOpgroup(op)!=G_NP1OUT_P))
658       {
659        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
660       }
661       DataLazy_ptr lleft;
662       if (!left->isLazy())
663       {
664        lleft=DataLazy_ptr(new DataLazy(left));
665       }
666       else
667       {
668        lleft=dynamic_pointer_cast<DataLazy>(left);
669       }
670       m_readytype=lleft->m_readytype;
671       m_left=lleft;
672       m_samplesize=getNumDPPSample()*getNoValues();
673       m_children=m_left->m_children+1;
674       m_height=m_left->m_height+1;
675       LazyNodeSetup();
676       SIZELIMIT
677    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
678    }
679    
680    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
681        : parent(left->getFunctionSpace(), left->getShape()),
682        m_op(op),
683        m_axis_offset(0),
684        m_transpose(0),
685        m_tol(tol)
686  {  {
687       if ((getOpgroup(op)!=G_UNARY_P))
688       {
689        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
690       }
691       DataLazy_ptr lleft;
692       if (!left->isLazy())
693       {
694        lleft=DataLazy_ptr(new DataLazy(left));
695       }
696       else
697       {
698        lleft=dynamic_pointer_cast<DataLazy>(left);
699       }
700       m_readytype=lleft->m_readytype;
701       m_left=lleft;
702       m_samplesize=getNumDPPSample()*getNoValues();
703       m_children=m_left->m_children+1;
704       m_height=m_left->m_height+1;
705       LazyNodeSetup();
706       SIZELIMIT
707    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
708  }  }
709    
710    
711  int  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
712  DataLazy::getBuffsRequired() const      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
713        m_op(op),
714        m_axis_offset(axis0),
715        m_transpose(axis1),
716        m_tol(0)
717    {
718       if ((getOpgroup(op)!=G_NP1OUT_2P))
719       {
720        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
721       }
722       DataLazy_ptr lleft;
723       if (!left->isLazy())
724       {
725        lleft=DataLazy_ptr(new DataLazy(left));
726       }
727       else
728       {
729        lleft=dynamic_pointer_cast<DataLazy>(left);
730       }
731       m_readytype=lleft->m_readytype;
732       m_left=lleft;
733       m_samplesize=getNumDPPSample()*getNoValues();
734       m_children=m_left->m_children+1;
735       m_height=m_left->m_height+1;
736       LazyNodeSetup();
737       SIZELIMIT
738    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
739    }
740    
741    DataLazy::~DataLazy()
742  {  {
743      return m_buffsRequired;     delete[] m_sampleids;
744  }  }
745    
746    
# Line 328  DataLazy::collapseToReady() Line 763  DataLazy::collapseToReady()
763    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
764    Data left(pleft);    Data left(pleft);
765    Data right;    Data right;
766    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
767    {    {
768      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
769    }    }
# Line 427  DataLazy::collapseToReady() Line 862  DataLazy::collapseToReady()
862      case LEZ:      case LEZ:
863      result=left.whereNonPositive();      result=left.whereNonPositive();
864      break;      break;
865        case NEZ:
866        result=left.whereNonZero(m_tol);
867        break;
868        case EZ:
869        result=left.whereZero(m_tol);
870        break;
871        case SYM:
872        result=left.symmetric();
873        break;
874        case NSYM:
875        result=left.nonsymmetric();
876        break;
877        case PROD:
878        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
879        break;
880        case TRANS:
881        result=left.transpose(m_axis_offset);
882        break;
883        case TRACE:
884        result=left.trace(m_axis_offset);
885        break;
886        case SWAP:
887        result=left.swapaxes(m_axis_offset, m_transpose);
888        break;
889        case MINVAL:
890        result=left.minval();
891        break;
892        case MAXVAL:
893        result=left.minval();
894        break;
895      default:      default:
896      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
897    }    }
898    return result.borrowReadyPtr();    return result.borrowReadyPtr();
899  }  }
# Line 454  DataLazy::collapse() Line 919  DataLazy::collapse()
919    m_op=IDENTITY;    m_op=IDENTITY;
920  }  }
921    
922  /*  
923    \brief Compute the value of the expression (binary operation) for the given sample.  
924    \return Vector which stores the value of the subexpression for the given sample.  
925    \param v A vector to store intermediate results.  
926    \param offset Index in v to begin storing results.  
927    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
928    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
929        {\
930    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
931    If the result is stored in v it should be stored at the offset given.        { \
932    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
933  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
934  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
935  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
936             lroffset+=leftstep; \
937             rroffset+=rightstep; \
938          }\
939          lroffset+=oleftstep;\
940          rroffset+=orightstep;\
941        }
942    
943    
944    // The result will be stored in m_samples
945    // The return value is a pointer to the DataVector, offset is the offset within the return value
946    const DataTypes::ValueType*
947    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
948    {
949    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
950        // collapse so we have a 'E' node or an IDENTITY for some other type
951      if (m_readytype!='E' && m_op!=IDENTITY)
952      {
953        collapse();
954      }
955      if (m_op==IDENTITY)  
956      {
957        const ValueType& vec=m_id->getVectorRO();
958        roffset=m_id->getPointOffset(sampleNo, 0);
959    #ifdef LAZY_STACK_PROF
960    int x;
961    if (&x<stackend[omp_get_thread_num()])
962    {
963           stackend[omp_get_thread_num()]=&x;
964    }
965    #endif
966        return &(vec);
967      }
968      if (m_readytype!='E')
969      {
970        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
971      }
972      if (m_sampleids[tid]==sampleNo)
973      {
974        roffset=tid*m_samplesize;
975        return &(m_samples);        // sample is already resolved
976      }
977      m_sampleids[tid]=sampleNo;
978      switch (getOpgroup(m_op))
979      {
980      case G_UNARY:
981      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
982      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
983      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
984      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
985      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
986      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
987      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
988      default:
989        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
990      }
991    }
992    
993    const DataTypes::ValueType*
994    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
995  {  {
996      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
997      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
998      // processing single points.      // processing single points.
999        // we will also know we won't get identity nodes
1000    if (m_readytype!='E')    if (m_readytype!='E')
1001    {    {
1002      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1003    }    }
1004    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1005    const double* left=&((*vleft)[roffset]);    {
1006    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1007    roffset=offset;    }
1008      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1009      const double* left=&((*leftres)[roffset]);
1010      roffset=m_samplesize*tid;
1011      double* result=&(m_samples[roffset]);
1012    switch (m_op)    switch (m_op)
1013    {    {
1014      case SIN:        case SIN:  
1015      tensor_unary_operation(m_samplesize, left, result, ::sin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1016      break;      break;
1017      case COS:      case COS:
1018      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1019      break;      break;
1020      case TAN:      case TAN:
1021      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1022      break;      break;
1023      case ASIN:      case ASIN:
1024      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1025      break;      break;
1026      case ACOS:      case ACOS:
1027      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1028      break;      break;
1029      case ATAN:      case ATAN:
1030      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1031      break;      break;
1032      case SINH:      case SINH:
1033      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1034      break;      break;
1035      case COSH:      case COSH:
1036      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1037      break;      break;
1038      case TANH:      case TANH:
1039      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1040      break;      break;
1041      case ERF:      case ERF:
1042  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1043      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1044  #else  #else
1045      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1046      break;      break;
1047  #endif  #endif
1048     case ASINH:     case ASINH:
1049  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1050      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1051  #else  #else
1052      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1053  #endif    #endif  
1054      break;      break;
1055     case ACOSH:     case ACOSH:
1056  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1057      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1058  #else  #else
1059      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1060  #endif    #endif  
1061      break;      break;
1062     case ATANH:     case ATANH:
1063  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1064      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1065  #else  #else
1066      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1067  #endif    #endif  
1068      break;      break;
1069      case LOG10:      case LOG10:
1070      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1071      break;      break;
1072      case LOG:      case LOG:
1073      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1074      break;      break;
1075      case SIGN:      case SIGN:
1076      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1077      break;      break;
1078      case ABS:      case ABS:
1079      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1080      break;      break;
1081      case NEG:      case NEG:
1082      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 558  DataLazy::resolveUnary(ValueType& v, siz Line 1087  DataLazy::resolveUnary(ValueType& v, siz
1087      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1088      break;      break;
1089      case EXP:      case EXP:
1090      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1091      break;      break;
1092      case SQRT:      case SQRT:
1093      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1094      break;      break;
1095      case RECIP:      case RECIP:
1096      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 578  DataLazy::resolveUnary(ValueType& v, siz Line 1107  DataLazy::resolveUnary(ValueType& v, siz
1107      case LEZ:      case LEZ:
1108      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1109      break;      break;
1110    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1111        case NEZ:
1112        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1113        break;
1114        case EZ:
1115        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1116        break;
1117    
1118      default:      default:
1119      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1120    }    }
1121    return &v;    return &(m_samples);
1122  }  }
1123    
1124    
1125    const DataTypes::ValueType*
1126    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1127    {
1128        // we assume that any collapsing has been done before we get here
1129        // since we only have one argument we don't need to think about only
1130        // processing single points.
1131        // we will also know we won't get identity nodes
1132      if (m_readytype!='E')
1133      {
1134        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1135      }
1136      if (m_op==IDENTITY)
1137      {
1138        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1139      }
1140      size_t loffset=0;
1141      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1142    
1143      roffset=m_samplesize*tid;
1144      unsigned int ndpps=getNumDPPSample();
1145      unsigned int psize=DataTypes::noValues(getShape());
1146      double* result=&(m_samples[roffset]);
1147      switch (m_op)
1148      {
1149        case MINVAL:
1150        {
1151          for (unsigned int z=0;z<ndpps;++z)
1152          {
1153            FMin op;
1154            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1155            loffset+=psize;
1156            result++;
1157          }
1158        }
1159        break;
1160        case MAXVAL:
1161        {
1162          for (unsigned int z=0;z<ndpps;++z)
1163          {
1164          FMax op;
1165          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1166          loffset+=psize;
1167          result++;
1168          }
1169        }
1170        break;
1171        default:
1172        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1173      }
1174      return &(m_samples);
1175    }
1176    
1177    const DataTypes::ValueType*
1178    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1179    {
1180        // we assume that any collapsing has been done before we get here
1181        // since we only have one argument we don't need to think about only
1182        // processing single points.
1183      if (m_readytype!='E')
1184      {
1185        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1186      }
1187      if (m_op==IDENTITY)
1188      {
1189        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1190      }
1191      size_t subroffset;
1192      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1193      roffset=m_samplesize*tid;
1194      size_t loop=0;
1195      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1196      size_t step=getNoValues();
1197      size_t offset=roffset;
1198      switch (m_op)
1199      {
1200        case SYM:
1201        for (loop=0;loop<numsteps;++loop)
1202        {
1203            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1204            subroffset+=step;
1205            offset+=step;
1206        }
1207        break;
1208        case NSYM:
1209        for (loop=0;loop<numsteps;++loop)
1210        {
1211            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1212            subroffset+=step;
1213            offset+=step;
1214        }
1215        break;
1216        default:
1217        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1218      }
1219      return &m_samples;
1220    }
1221    
1222    const DataTypes::ValueType*
1223    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1224    {
1225        // we assume that any collapsing has been done before we get here
1226        // since we only have one argument we don't need to think about only
1227        // processing single points.
1228      if (m_readytype!='E')
1229      {
1230        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1231      }
1232      if (m_op==IDENTITY)
1233      {
1234        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1235      }
1236      size_t subroffset;
1237      size_t offset;
1238      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1239      roffset=m_samplesize*tid;
1240      offset=roffset;
1241      size_t loop=0;
1242      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1243      size_t outstep=getNoValues();
1244      size_t instep=m_left->getNoValues();
1245      switch (m_op)
1246      {
1247        case TRACE:
1248        for (loop=0;loop<numsteps;++loop)
1249        {
1250                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1251            subroffset+=instep;
1252            offset+=outstep;
1253        }
1254        break;
1255        case TRANS:
1256        for (loop=0;loop<numsteps;++loop)
1257        {
1258                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1259            subroffset+=instep;
1260            offset+=outstep;
1261        }
1262        break;
1263        default:
1264        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1265      }
1266      return &m_samples;
1267    }
1268    
1269  #define PROC_OP(X) \  
1270      for (int i=0;i<steps;++i,resultp+=resultStep) \  const DataTypes::ValueType*
1271      { \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1272         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  {
1273         lroffset+=leftStep; \    if (m_readytype!='E')
1274         rroffset+=rightStep; \    {
1275        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1276      }
1277      if (m_op==IDENTITY)
1278      {
1279        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1280      }
1281      size_t subroffset;
1282      size_t offset;
1283      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1284      roffset=m_samplesize*tid;
1285      offset=roffset;
1286      size_t loop=0;
1287      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1288      size_t outstep=getNoValues();
1289      size_t instep=m_left->getNoValues();
1290      switch (m_op)
1291      {
1292        case SWAP:
1293        for (loop=0;loop<numsteps;++loop)
1294        {
1295                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1296            subroffset+=instep;
1297            offset+=outstep;
1298      }      }
1299        break;
1300        default:
1301        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1302      }
1303      return &m_samples;
1304    }
1305    
1306    
1307    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1308  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1309  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1310  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 618  DataLazy::resolveUnary(ValueType& v, siz Line 1314  DataLazy::resolveUnary(ValueType& v, siz
1314  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1315  // For example, 2+Vector.  // For example, 2+Vector.
1316  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1317  DataTypes::ValueType*  const DataTypes::ValueType*
1318  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1319  {  {
1320  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1321    
1322    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1323      // first work out which of the children are expanded      // first work out which of the children are expanded
1324    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1325    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1326    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1327    int steps=(bigloops?1:getNumDPPSample());    {
1328    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1329    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1330    {    bool leftScalar=(m_left->getRank()==0);
1331      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1332      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1333      chunksize=1;    // for scalar    {
1334    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1335    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1336    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1337    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1338      size_t chunksize=1;           // how many doubles will be processed in one go
1339      int leftstep=0;       // how far should the left offset advance after each step
1340      int rightstep=0;
1341      int numsteps=0;       // total number of steps for the inner loop
1342      int oleftstep=0;  // the o variables refer to the outer loop
1343      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1344      int onumsteps=1;
1345      
1346      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1347      bool RES=(rightExp && rightScalar);
1348      bool LS=(!leftExp && leftScalar); // left is a single scalar
1349      bool RS=(!rightExp && rightScalar);
1350      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1351      bool RN=(!rightExp && !rightScalar);
1352      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1353      bool REN=(rightExp && !rightScalar);
1354    
1355      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1356      {
1357        chunksize=m_left->getNumDPPSample()*leftsize;
1358        leftstep=0;
1359        rightstep=0;
1360        numsteps=1;
1361      }
1362      else if (LES || RES)
1363      {
1364        chunksize=1;
1365        if (LES)        // left is an expanded scalar
1366        {
1367            if (RS)
1368            {
1369               leftstep=1;
1370               rightstep=0;
1371               numsteps=m_left->getNumDPPSample();
1372            }
1373            else        // RN or REN
1374            {
1375               leftstep=0;
1376               oleftstep=1;
1377               rightstep=1;
1378               orightstep=(RN ? -(int)rightsize : 0);
1379               numsteps=rightsize;
1380               onumsteps=m_left->getNumDPPSample();
1381            }
1382        }
1383        else        // right is an expanded scalar
1384        {
1385            if (LS)
1386            {
1387               rightstep=1;
1388               leftstep=0;
1389               numsteps=m_right->getNumDPPSample();
1390            }
1391            else
1392            {
1393               rightstep=0;
1394               orightstep=1;
1395               leftstep=1;
1396               oleftstep=(LN ? -(int)leftsize : 0);
1397               numsteps=leftsize;
1398               onumsteps=m_right->getNumDPPSample();
1399            }
1400        }
1401      }
1402      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1403      {
1404        if (LEN)    // and Right will be a single value
1405        {
1406            chunksize=rightsize;
1407            leftstep=rightsize;
1408            rightstep=0;
1409            numsteps=m_left->getNumDPPSample();
1410            if (RS)
1411            {
1412               numsteps*=leftsize;
1413            }
1414        }
1415        else    // REN
1416        {
1417            chunksize=leftsize;
1418            rightstep=leftsize;
1419            leftstep=0;
1420            numsteps=m_right->getNumDPPSample();
1421            if (LS)
1422            {
1423               numsteps*=rightsize;
1424            }
1425        }
1426      }
1427    
1428      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1429      // Get the values of sub-expressions      // Get the values of sub-expressions
1430    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1431    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1432      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1433    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1434    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1435    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1436    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1437    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1438    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1439    
1440    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1441    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1442    
1443    
1444      roffset=m_samplesize*tid;
1445      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
1446    switch(m_op)    switch(m_op)
1447    {    {
1448      case ADD:      case ADD:
1449      PROC_OP(plus<double>());          PROC_OP(NO_ARG,plus<double>());
1450      break;      break;
1451      case SUB:      case SUB:
1452      PROC_OP(minus<double>());      PROC_OP(NO_ARG,minus<double>());
1453      break;      break;
1454      case MUL:      case MUL:
1455      PROC_OP(multiplies<double>());      PROC_OP(NO_ARG,multiplies<double>());
1456      break;      break;
1457      case DIV:      case DIV:
1458      PROC_OP(divides<double>());      PROC_OP(NO_ARG,divides<double>());
1459      break;      break;
1460      case POW:      case POW:
1461      PROC_OP(::pow);         PROC_OP(double (double,double),::pow);
1462      break;      break;
1463      default:      default:
1464      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1465    }    }
1466    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1467    return &v;    return &m_samples;
1468  }  }
1469    
1470    
1471    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1472    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1473    // unlike the other resolve helpers, we must treat these datapoints separately.
1474    const DataTypes::ValueType*
1475    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1476    {
1477    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1478    
1479  /*    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1480    \brief Compute the value of the expression for the given sample.      // first work out which of the children are expanded
1481    \return Vector which stores the value of the subexpression for the given sample.    bool leftExp=(m_left->m_readytype=='E');
1482    \param v A vector to store intermediate results.    bool rightExp=(m_right->m_readytype=='E');
1483    \param offset Index in v to begin storing results.    int steps=getNumDPPSample();
1484    \param sampleNo Sample number to evaluate.    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1485    \param roffset (output parameter) the offset in the return vector where the result begins.    int rightStep=(rightExp?m_right->getNoValues() : 0);
1486    
1487    The return value will be an existing vector so do not deallocate it.    int resultStep=getNoValues();
1488  */    roffset=m_samplesize*tid;
1489  // the vector and the offset are a place where the method could write its data if it wishes    size_t offset=roffset;
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1490    
1491  // the roffset is the offset within the returned vector where the data begins    const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1492  const DataTypes::ValueType*  
1493  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)    const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1494  {  
1495  cout << "Resolve sample " << toString() << endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1496      // collapse so we have a 'E' node or an IDENTITY for some other type  cout << getNoValues() << endl;)
1497    if (m_readytype!='E' && m_op!=IDENTITY)  
1498    {  
1499      collapse();  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1500    }  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1501    if (m_op==IDENTITY)    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1502    {  LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1503      const ValueType& vec=m_id->getVector();  LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1504      if (m_readytype=='C')  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1505      {  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1506      roffset=0;  
1507      return &(vec);    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
1508      }    switch(m_op)
     roffset=m_id->getPointOffset(sampleNo, 0);  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
1509    {    {
1510    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      case PROD:
1511    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      for (int i=0;i<steps;++i,resultp+=resultStep)
1512    default:      {
1513      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");            const double *ptr_0 = &((*left)[lroffset]);
1514              const double *ptr_1 = &((*right)[rroffset]);
1515    
1516    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1517    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1518    
1519              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1520    
1521          lroffset+=leftStep;
1522          rroffset+=rightStep;
1523        }
1524        break;
1525        default:
1526        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1527    }    }
1528      roffset=offset;
1529      return &m_samples;
1530    }
1531    
1532    
1533    const DataTypes::ValueType*
1534    DataLazy::resolveSample(int sampleNo, size_t& roffset)
1535    {
1536    #ifdef _OPENMP
1537        int tid=omp_get_thread_num();
1538    #else
1539        int tid=0;
1540    #endif
1541    
1542    #ifdef LAZY_STACK_PROF
1543        stackstart[tid]=&tid;
1544        stackend[tid]=&tid;
1545        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1546        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1547        #pragma omp critical
1548        if (d>maxstackuse)
1549        {
1550    cout << "Max resolve Stack use " << d << endl;
1551            maxstackuse=d;
1552        }
1553        return r;
1554    #else
1555        return resolveNodeSample(tid, sampleNo, roffset);
1556    #endif
1557    }
1558    
1559    
1560    // This needs to do the work of the identity constructor
1561    void
1562    DataLazy::resolveToIdentity()
1563    {
1564       if (m_op==IDENTITY)
1565        return;
1566       DataReady_ptr p=resolveNodeWorker();
1567       makeIdentity(p);
1568    }
1569    
1570    void DataLazy::makeIdentity(const DataReady_ptr& p)
1571    {
1572       m_op=IDENTITY;
1573       m_axis_offset=0;
1574       m_transpose=0;
1575       m_SL=m_SM=m_SR=0;
1576       m_children=m_height=0;
1577       m_id=p;
1578       if(p->isConstant()) {m_readytype='C';}
1579       else if(p->isExpanded()) {m_readytype='E';}
1580       else if (p->isTagged()) {m_readytype='T';}
1581       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1582       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1583       m_left.reset();
1584       m_right.reset();
1585  }  }
1586    
1587    
 // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
1588  DataReady_ptr  DataReady_ptr
1589  DataLazy::resolve()  DataLazy::resolve()
1590  {  {
1591        resolveToIdentity();
1592        return m_id;
1593    }
1594    
1595  cout << "Sample size=" << m_samplesize << endl;  // This version of resolve uses storage in each node to hold results
1596  cout << "Buffers=" << m_buffsRequired << endl;  DataReady_ptr
1597    DataLazy::resolveNodeWorker()
1598    {
1599    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1600    {    {
1601      collapse();      collapse();
# Line 738  cout << "Buffers=" << m_buffsRequired << Line 1605  cout << "Buffers=" << m_buffsRequired <<
1605      return m_id;      return m_id;
1606    }    }
1607      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired));    // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
   int threadnum=0;  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1608    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1609    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1610    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1611    
1612    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1613    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1614    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1615    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1616    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1617    {    {
1618  cout << "################################# " << sample << endl;      size_t roffset=0;
1619    #ifdef LAZY_STACK_PROF
1620        stackstart[omp_get_thread_num()]=&roffset;
1621        stackend[omp_get_thread_num()]=&roffset;
1622    #endif
1623        #pragma omp for schedule(static)
1624        for (sample=0;sample<totalsamples;++sample)
1625        {
1626            roffset=0;
1627  #ifdef _OPENMP  #ifdef _OPENMP
1628      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1629  #else  #else
1630      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1631  #endif  #endif
1632  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1633      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1634  cerr << "offset=" << outoffset << endl;              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1635      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1636      {      }
1637      resvec[outoffset]=(*res)[resoffset];    }
1638      }  #ifdef LAZY_STACK_PROF
1639  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1640      {
1641        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1642    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1643        if (r>maxstackuse)
1644        {
1645            maxstackuse=r;
1646        }
1647    }    }
1648      cout << "Max resolve Stack use=" << maxstackuse << endl;
1649    #endif
1650    return resptr;    return resptr;
1651  }  }
1652    
# Line 780  std::string Line 1654  std::string
1654  DataLazy::toString() const  DataLazy::toString() const
1655  {  {
1656    ostringstream oss;    ostringstream oss;
1657    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1658    intoString(oss);    if (escriptParams.getPRINT_LAZY_TREE()==0)
1659      {
1660          intoString(oss);
1661      }
1662      else
1663      {
1664        oss << endl;
1665        intoTreeString(oss,"");
1666      }
1667    return oss.str();    return oss.str();
1668  }  }
1669    
# Line 789  DataLazy::toString() const Line 1671  DataLazy::toString() const
1671  void  void
1672  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1673  {  {
1674    //    oss << "[" << m_children <<";"<<m_height <<"]";
1675    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1676    {    {
1677    case G_IDENTITY:    case G_IDENTITY:
# Line 818  DataLazy::intoString(ostringstream& oss) Line 1701  DataLazy::intoString(ostringstream& oss)
1701      oss << ')';      oss << ')';
1702      break;      break;
1703    case G_UNARY:    case G_UNARY:
1704      case G_UNARY_P:
1705      case G_NP1OUT:
1706      case G_NP1OUT_P:
1707      case G_REDUCTION:
1708        oss << opToString(m_op) << '(';
1709        m_left->intoString(oss);
1710        oss << ')';
1711        break;
1712      case G_TENSORPROD:
1713        oss << opToString(m_op) << '(';
1714        m_left->intoString(oss);
1715        oss << ", ";
1716        m_right->intoString(oss);
1717        oss << ')';
1718        break;
1719      case G_NP1OUT_2P:
1720      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1721      m_left->intoString(oss);      m_left->intoString(oss);
1722        oss << ", " << m_axis_offset << ", " << m_transpose;
1723      oss << ')';      oss << ')';
1724      break;      break;
1725    default:    default:
# Line 827  DataLazy::intoString(ostringstream& oss) Line 1727  DataLazy::intoString(ostringstream& oss)
1727    }    }
1728  }  }
1729    
1730  // Note that in this case, deepCopy does not make copies of the leaves.  
1731  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1732    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1733    {
1734      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1735      switch (getOpgroup(m_op))
1736      {
1737      case G_IDENTITY:
1738        if (m_id->isExpanded())
1739        {
1740           oss << "E";
1741        }
1742        else if (m_id->isTagged())
1743        {
1744          oss << "T";
1745        }
1746        else if (m_id->isConstant())
1747        {
1748          oss << "C";
1749        }
1750        else
1751        {
1752          oss << "?";
1753        }
1754        oss << '@' << m_id.get() << endl;
1755        break;
1756      case G_BINARY:
1757        oss << opToString(m_op) << endl;
1758        indent+='.';
1759        m_left->intoTreeString(oss, indent);
1760        m_right->intoTreeString(oss, indent);
1761        break;
1762      case G_UNARY:
1763      case G_UNARY_P:
1764      case G_NP1OUT:
1765      case G_NP1OUT_P:
1766      case G_REDUCTION:
1767        oss << opToString(m_op) << endl;
1768        indent+='.';
1769        m_left->intoTreeString(oss, indent);
1770        break;
1771      case G_TENSORPROD:
1772        oss << opToString(m_op) << endl;
1773        indent+='.';
1774        m_left->intoTreeString(oss, indent);
1775        m_right->intoTreeString(oss, indent);
1776        break;
1777      case G_NP1OUT_2P:
1778        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1779        indent+='.';
1780        m_left->intoTreeString(oss, indent);
1781        break;
1782      default:
1783        oss << "UNKNOWN";
1784      }
1785    }
1786    
1787    
1788  DataAbstract*  DataAbstract*
1789  DataLazy::deepCopy()  DataLazy::deepCopy()
1790  {  {
1791    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1792    {    {
1793      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1794      case G_UNARY:
1795      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1796      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1797      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1798      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1799      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1800      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1801      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1802      default:
1803        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1804    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1805  }  }
1806    
1807    
1808    
1809    // There is no single, natural interpretation of getLength on DataLazy.
1810    // Instances of DataReady can look at the size of their vectors.
1811    // For lazy though, it could be the size the data would be if it were resolved;
1812    // or it could be some function of the lengths of the DataReady instances which
1813    // form part of the expression.
1814    // Rather than have people making assumptions, I have disabled the method.
1815  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1816  DataLazy::getLength() const  DataLazy::getLength() const
1817  {  {
1818    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1819  }  }
1820    
1821    
# Line 853  DataLazy::getSlice(const DataTypes::Regi Line 1825  DataLazy::getSlice(const DataTypes::Regi
1825    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1826  }  }
1827    
1828    
1829    // To do this we need to rely on our child nodes
1830    DataTypes::ValueType::size_type
1831    DataLazy::getPointOffset(int sampleNo,
1832                     int dataPointNo)
1833    {
1834      if (m_op==IDENTITY)
1835      {
1836        return m_id->getPointOffset(sampleNo,dataPointNo);
1837      }
1838      if (m_readytype!='E')
1839      {
1840        collapse();
1841        return m_id->getPointOffset(sampleNo,dataPointNo);
1842      }
1843      // at this point we do not have an identity node and the expression will be Expanded
1844      // so we only need to know which child to ask
1845      if (m_left->m_readytype=='E')
1846      {
1847        return m_left->getPointOffset(sampleNo,dataPointNo);
1848      }
1849      else
1850      {
1851        return m_right->getPointOffset(sampleNo,dataPointNo);
1852      }
1853    }
1854    
1855    // To do this we need to rely on our child nodes
1856  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1857  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1858                   int dataPointNo) const                   int dataPointNo) const
1859  {  {
1860    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1861      {
1862        return m_id->getPointOffset(sampleNo,dataPointNo);
1863      }
1864      if (m_readytype=='E')
1865      {
1866        // at this point we do not have an identity node and the expression will be Expanded
1867        // so we only need to know which child to ask
1868        if (m_left->m_readytype=='E')
1869        {
1870        return m_left->getPointOffset(sampleNo,dataPointNo);
1871        }
1872        else
1873        {
1874        return m_right->getPointOffset(sampleNo,dataPointNo);
1875        }
1876      }
1877      if (m_readytype=='C')
1878      {
1879        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1880      }
1881      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1882  }  }
1883    
1884  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
1885  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
1886  void  void
1887  DataLazy::setToZero()  DataLazy::setToZero()
1888  {  {
1889    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
1890    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1891    m_op=IDENTITY;  //   m_op=IDENTITY;
1892    m_right.reset();    //   m_right.reset();  
1893    m_left.reset();  //   m_left.reset();
1894    m_readytype='C';  //   m_readytype='C';
1895    m_buffsRequired=1;  //   m_buffsRequired=1;
1896    
1897      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
1898      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1899    }
1900    
1901    bool
1902    DataLazy::actsExpanded() const
1903    {
1904        return (m_readytype=='E');
1905  }  }
1906    
1907  }   // end namespace  }   // end namespace

Legend:
Removed from v.1910  
changed lines
  Added in v.2791

  ViewVC Help
Powered by ViewVC 1.1.26