/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1910 by jfenwick, Thu Oct 23 03:05:28 2008 UTC trunk/escript/src/DataLazy.cpp revision 2779 by caltinay, Thu Nov 26 03:51:15 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #include <iomanip>      // for some fancy formatting in debug
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49    
50  /*  /*
51  How does DataLazy work?  How does DataLazy work?
# Line 47  I will refer to individual DataLazy obje Line 67  I will refer to individual DataLazy obje
67  Each node also stores:  Each node also stores:
68  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69      evaluated.      evaluated.
 - m_length ~ how many values would be stored in the answer if the expression was evaluated.  
70  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71      evaluate the expression.      evaluate the expression.
72  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
# Line 69  The convention that I use, is that the r Line 88  The convention that I use, is that the r
88    
89  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91    
92    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93    1) Add to the ES_optype.
94    2) determine what opgroup your operation belongs to (X)
95    3) add a string for the op to the end of ES_opstrings
96    4) increase ES_opcount
97    5) add an entry (X) to opgroups
98    6) add an entry to the switch in collapseToReady
99    7) add an entry to resolveX
100  */  */
101    
102    
# Line 78  using namespace boost; Line 106  using namespace boost;
106  namespace escript  namespace escript
107  {  {
108    
 const std::string&  
 opToString(ES_optype op);  
   
109  namespace  namespace
110  {  {
111    
112    
113    // enabling this will print out when ever the maximum stacksize used by resolve increases
114    // it assumes _OPENMP is also in use
115    //#define LAZY_STACK_PROF
116    
117    
118    
119    #ifndef _OPENMP
120      #ifdef LAZY_STACK_PROF
121      #undef LAZY_STACK_PROF
122      #endif
123    #endif
124    
125    
126    #ifdef LAZY_STACK_PROF
127    std::vector<void*> stackstart(getNumberOfThreads());
128    std::vector<void*> stackend(getNumberOfThreads());
129    size_t maxstackuse=0;
130    #endif
131    
132  enum ES_opgroup  enum ES_opgroup
133  {  {
134     G_UNKNOWN,     G_UNKNOWN,
135     G_IDENTITY,     G_IDENTITY,
136     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
137     G_UNARY      // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
138       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
139       G_NP1OUT,        // non-pointwise op with one output
140       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
141       G_TENSORPROD,    // general tensor product
142       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
143       G_REDUCTION      // non-pointwise unary op with a scalar output
144  };  };
145    
146    
# Line 100  string ES_opstrings[]={"UNKNOWN","IDENTI Line 151  string ES_opstrings[]={"UNKNOWN","IDENTI
151              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
152              "asinh","acosh","atanh",              "asinh","acosh","atanh",
153              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
154              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
155  int ES_opcount=32;              "symmetric","nonsymmetric",
156                "prod",
157                "transpose", "trace",
158                "swapaxes",
159                "minval", "maxval"};
160    int ES_opcount=43;
161  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
162              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
163              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
164              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
165              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
166              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
167                G_NP1OUT,G_NP1OUT,
168                G_TENSORPROD,
169                G_NP1OUT_P, G_NP1OUT_P,
170                G_NP1OUT_2P,
171                G_REDUCTION, G_REDUCTION};
172  inline  inline
173  ES_opgroup  ES_opgroup
174  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 124  resultFS(DataAbstract_ptr left, DataAbst Line 185  resultFS(DataAbstract_ptr left, DataAbst
185      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
186      // programming error exception.      // programming error exception.
187    
188      FunctionSpace l=left->getFunctionSpace();
189      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
190      {    if (l!=r)
191          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
192      }      if (r.probeInterpolation(l))
193      return left->getFunctionSpace();      {
194        return l;
195        }
196        if (l.probeInterpolation(r))
197        {
198        return r;
199        }
200        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
201      }
202      return l;
203  }  }
204    
205  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
206    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
207  DataTypes::ShapeType  DataTypes::ShapeType
208  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
209  {  {
210      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
211      {      {
212        if (getOpgroup(op)!=G_BINARY)        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
213        {        {
214          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
215        }        }
216    
217        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
218        {        {
219          return right->getShape();          return right->getShape();
# Line 155  resultShape(DataAbstract_ptr left, DataA Line 227  resultShape(DataAbstract_ptr left, DataA
227      return left->getShape();      return left->getShape();
228  }  }
229    
230  // determine the number of points in the result of "left op right"  // return the shape for "op left"
231  size_t  
232  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataTypes::ShapeType
233    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
234  {  {
235     switch (getOpgroup(op))      switch(op)
236     {      {
237     case G_BINARY: return left->getLength();          case TRANS:
238     case G_UNARY: return left->getLength();         {            // for the scoping of variables
239     default:          const DataTypes::ShapeType& s=left->getShape();
240      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
241     }          int rank=left->getRank();
242            if (axis_offset<0 || axis_offset>rank)
243            {
244                stringstream e;
245                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
246                throw DataException(e.str());
247            }
248            for (int i=0; i<rank; i++)
249            {
250               int index = (axis_offset+i)%rank;
251               sh.push_back(s[index]); // Append to new shape
252            }
253            return sh;
254           }
255        break;
256        case TRACE:
257           {
258            int rank=left->getRank();
259            if (rank<2)
260            {
261               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
262            }
263            if ((axis_offset>rank-2) || (axis_offset<0))
264            {
265               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
266            }
267            if (rank==2)
268            {
269               return DataTypes::scalarShape;
270            }
271            else if (rank==3)
272            {
273               DataTypes::ShapeType sh;
274                   if (axis_offset==0)
275               {
276                    sh.push_back(left->getShape()[2]);
277                   }
278                   else     // offset==1
279               {
280                sh.push_back(left->getShape()[0]);
281                   }
282               return sh;
283            }
284            else if (rank==4)
285            {
286               DataTypes::ShapeType sh;
287               const DataTypes::ShapeType& s=left->getShape();
288                   if (axis_offset==0)
289               {
290                    sh.push_back(s[2]);
291                    sh.push_back(s[3]);
292                   }
293                   else if (axis_offset==1)
294               {
295                    sh.push_back(s[0]);
296                    sh.push_back(s[3]);
297                   }
298               else     // offset==2
299               {
300                sh.push_back(s[0]);
301                sh.push_back(s[1]);
302               }
303               return sh;
304            }
305            else        // unknown rank
306            {
307               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
308            }
309           }
310        break;
311            default:
312        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
313        }
314  }  }
315    
316  // determine the number of samples requires to evaluate an expression combining left and right  DataTypes::ShapeType
317  int  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
318  {  {
319     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
320     {       // Some of the checks are probably redundant here
321     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
322     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
323     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
324     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
325      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
326     }       int rank=left->getRank();
327         if (rank<2) {
328            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
329         }
330         if (axis0<0 || axis0>rank-1) {
331            stringstream e;
332            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
333            throw DataException(e.str());
334         }
335         if (axis1<0 || axis1>rank-1) {
336            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
337            throw DataException(e.str());
338         }
339         if (axis0 == axis1) {
340             throw DataException("Error - Data::swapaxes: axis indices must be different.");
341         }
342         if (axis0 > axis1) {
343             axis0_tmp=axis1;
344             axis1_tmp=axis0;
345         } else {
346             axis0_tmp=axis0;
347             axis1_tmp=axis1;
348         }
349         for (int i=0; i<rank; i++) {
350           if (i == axis0_tmp) {
351              out_shape.push_back(s[axis1_tmp]);
352           } else if (i == axis1_tmp) {
353              out_shape.push_back(s[axis0_tmp]);
354           } else {
355              out_shape.push_back(s[i]);
356           }
357         }
358        return out_shape;
359  }  }
360    
361    
362    // determine the output shape for the general tensor product operation
363    // the additional parameters return information required later for the product
364    // the majority of this code is copy pasted from C_General_Tensor_Product
365    DataTypes::ShapeType
366    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
367    {
368        
369      // Get rank and shape of inputs
370      int rank0 = left->getRank();
371      int rank1 = right->getRank();
372      const DataTypes::ShapeType& shape0 = left->getShape();
373      const DataTypes::ShapeType& shape1 = right->getShape();
374    
375      // Prepare for the loops of the product and verify compatibility of shapes
376      int start0=0, start1=0;
377      if (transpose == 0)       {}
378      else if (transpose == 1)  { start0 = axis_offset; }
379      else if (transpose == 2)  { start1 = rank1-axis_offset; }
380      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
381    
382      if (rank0<axis_offset)
383      {
384        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
385      }
386    
387      // Adjust the shapes for transpose
388      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
389      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
390      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
391      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
392    
393      // Prepare for the loops of the product
394      SL=1, SM=1, SR=1;
395      for (int i=0; i<rank0-axis_offset; i++)   {
396        SL *= tmpShape0[i];
397      }
398      for (int i=rank0-axis_offset; i<rank0; i++)   {
399        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
400          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
401        }
402        SM *= tmpShape0[i];
403      }
404      for (int i=axis_offset; i<rank1; i++)     {
405        SR *= tmpShape1[i];
406      }
407    
408      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
409      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
410      {         // block to limit the scope of out_index
411         int out_index=0;
412         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
413         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
414      }
415    
416      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
417      {
418         ostringstream os;
419         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
420         throw DataException(os.str());
421      }
422    
423      return shape2;
424    }
425    
426  }   // end anonymous namespace  }   // end anonymous namespace
427    
428    
# Line 198  opToString(ES_optype op) Line 438  opToString(ES_optype op)
438    return ES_opstrings[op];    return ES_opstrings[op];
439  }  }
440    
441    void DataLazy::LazyNodeSetup()
442    {
443    #ifdef _OPENMP
444        int numthreads=omp_get_max_threads();
445        m_samples.resize(numthreads*m_samplesize);
446        m_sampleids=new int[numthreads];
447        for (int i=0;i<numthreads;++i)
448        {
449            m_sampleids[i]=-1;  
450        }
451    #else
452        m_samples.resize(m_samplesize);
453        m_sampleids=new int[1];
454        m_sampleids[0]=-1;
455    #endif  // _OPENMP
456    }
457    
458    
459    // Creates an identity node
460  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
461      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
462      m_op(IDENTITY)      ,m_sampleids(0),
463        m_samples(1)
464  {  {
465     if (p->isLazy())     if (p->isLazy())
466     {     {
# Line 212  DataLazy::DataLazy(DataAbstract_ptr p) Line 471  DataLazy::DataLazy(DataAbstract_ptr p)
471     }     }
472     else     else
473     {     {
474      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
475      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
476      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
477      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
478     }     }
479     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
480  }  }
481    
   
   
   
482  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
483      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
484      m_op(op)      m_op(op),
485        m_axis_offset(0),
486        m_transpose(0),
487        m_SL(0), m_SM(0), m_SR(0)
488  {  {
489     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
490     {     {
491      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
492     }     }
493    
494     DataLazy_ptr lleft;     DataLazy_ptr lleft;
495     if (!left->isLazy())     if (!left->isLazy())
496     {     {
# Line 245  DataLazy::DataLazy(DataAbstract_ptr left Line 501  DataLazy::DataLazy(DataAbstract_ptr left
501      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
502     }     }
503     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
504     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
505     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
506       m_children=m_left->m_children+1;
507       m_height=m_left->m_height+1;
508       LazyNodeSetup();
509       SIZELIMIT
510  }  }
511    
512    
513    // In this constructor we need to consider interpolation
514  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
515      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
516      m_op(op)      m_op(op),
517        m_SL(0), m_SM(0), m_SR(0)
518  {  {
519     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
520       if ((getOpgroup(op)!=G_BINARY))
521     {     {
522      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
523     }     }
524    
525       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
526       {
527        FunctionSpace fs=getFunctionSpace();
528        Data ltemp(left);
529        Data tmp(ltemp,fs);
530        left=tmp.borrowDataPtr();
531       }
532       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
533       {
534        Data tmp(Data(right),getFunctionSpace());
535        right=tmp.borrowDataPtr();
536    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
537       }
538       left->operandCheck(*right);
539    
540     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
541     {     {
542      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
543    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
544     }     }
545     else     else
546     {     {
547      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
548    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
549     }     }
550     if (right->isLazy())     if (right->isLazy())
551     {     {
552      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
553    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
554     }     }
555     else     else
556     {     {
557      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
558    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
559     }     }
560     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
561     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 290  DataLazy::DataLazy(DataAbstract_ptr left Line 571  DataLazy::DataLazy(DataAbstract_ptr left
571     {     {
572      m_readytype='C';      m_readytype='C';
573     }     }
574     m_length=resultLength(m_left,m_right,m_op);     m_samplesize=getNumDPPSample()*getNoValues();
575     m_samplesize=getNumDPPSample()*getNoValues();         m_children=m_left->m_children+m_right->m_children+2;
576     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
577  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
578       SIZELIMIT
579    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
580  }  }
581    
582    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
583        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
584        m_op(op),
585        m_axis_offset(axis_offset),
586        m_transpose(transpose)
587    {
588       if ((getOpgroup(op)!=G_TENSORPROD))
589       {
590        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
591       }
592       if ((transpose>2) || (transpose<0))
593       {
594        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
595       }
596       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
597       {
598        FunctionSpace fs=getFunctionSpace();
599        Data ltemp(left);
600        Data tmp(ltemp,fs);
601        left=tmp.borrowDataPtr();
602       }
603       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
604       {
605        Data tmp(Data(right),getFunctionSpace());
606        right=tmp.borrowDataPtr();
607       }
608    //    left->operandCheck(*right);
609    
610  DataLazy::~DataLazy()     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
611       {
612        m_left=dynamic_pointer_cast<DataLazy>(left);
613       }
614       else
615       {
616        m_left=DataLazy_ptr(new DataLazy(left));
617       }
618       if (right->isLazy())
619       {
620        m_right=dynamic_pointer_cast<DataLazy>(right);
621       }
622       else
623       {
624        m_right=DataLazy_ptr(new DataLazy(right));
625       }
626       char lt=m_left->m_readytype;
627       char rt=m_right->m_readytype;
628       if (lt=='E' || rt=='E')
629       {
630        m_readytype='E';
631       }
632       else if (lt=='T' || rt=='T')
633       {
634        m_readytype='T';
635       }
636       else
637       {
638        m_readytype='C';
639       }
640       m_samplesize=getNumDPPSample()*getNoValues();
641       m_children=m_left->m_children+m_right->m_children+2;
642       m_height=max(m_left->m_height,m_right->m_height)+1;
643       LazyNodeSetup();
644       SIZELIMIT
645    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
646    }
647    
648    
649    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
650        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
651        m_op(op),
652        m_axis_offset(axis_offset),
653        m_transpose(0),
654        m_tol(0)
655    {
656       if ((getOpgroup(op)!=G_NP1OUT_P))
657       {
658        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
659       }
660       DataLazy_ptr lleft;
661       if (!left->isLazy())
662       {
663        lleft=DataLazy_ptr(new DataLazy(left));
664       }
665       else
666       {
667        lleft=dynamic_pointer_cast<DataLazy>(left);
668       }
669       m_readytype=lleft->m_readytype;
670       m_left=lleft;
671       m_samplesize=getNumDPPSample()*getNoValues();
672       m_children=m_left->m_children+1;
673       m_height=m_left->m_height+1;
674       LazyNodeSetup();
675       SIZELIMIT
676    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
677    }
678    
679    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
680        : parent(left->getFunctionSpace(), left->getShape()),
681        m_op(op),
682        m_axis_offset(0),
683        m_transpose(0),
684        m_tol(tol)
685  {  {
686       if ((getOpgroup(op)!=G_UNARY_P))
687       {
688        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
689       }
690       DataLazy_ptr lleft;
691       if (!left->isLazy())
692       {
693        lleft=DataLazy_ptr(new DataLazy(left));
694       }
695       else
696       {
697        lleft=dynamic_pointer_cast<DataLazy>(left);
698       }
699       m_readytype=lleft->m_readytype;
700       m_left=lleft;
701       m_samplesize=getNumDPPSample()*getNoValues();
702       m_children=m_left->m_children+1;
703       m_height=m_left->m_height+1;
704       LazyNodeSetup();
705       SIZELIMIT
706    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
707  }  }
708    
709    
710  int  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
711  DataLazy::getBuffsRequired() const      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
712        m_op(op),
713        m_axis_offset(axis0),
714        m_transpose(axis1),
715        m_tol(0)
716    {
717       if ((getOpgroup(op)!=G_NP1OUT_2P))
718       {
719        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
720       }
721       DataLazy_ptr lleft;
722       if (!left->isLazy())
723       {
724        lleft=DataLazy_ptr(new DataLazy(left));
725       }
726       else
727       {
728        lleft=dynamic_pointer_cast<DataLazy>(left);
729       }
730       m_readytype=lleft->m_readytype;
731       m_left=lleft;
732       m_samplesize=getNumDPPSample()*getNoValues();
733       m_children=m_left->m_children+1;
734       m_height=m_left->m_height+1;
735       LazyNodeSetup();
736       SIZELIMIT
737    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
738    }
739    
740    DataLazy::~DataLazy()
741  {  {
742      return m_buffsRequired;     delete[] m_sampleids;
743  }  }
744    
745    
# Line 328  DataLazy::collapseToReady() Line 762  DataLazy::collapseToReady()
762    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
763    Data left(pleft);    Data left(pleft);
764    Data right;    Data right;
765    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
766    {    {
767      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
768    }    }
# Line 427  DataLazy::collapseToReady() Line 861  DataLazy::collapseToReady()
861      case LEZ:      case LEZ:
862      result=left.whereNonPositive();      result=left.whereNonPositive();
863      break;      break;
864        case NEZ:
865        result=left.whereNonZero(m_tol);
866        break;
867        case EZ:
868        result=left.whereZero(m_tol);
869        break;
870        case SYM:
871        result=left.symmetric();
872        break;
873        case NSYM:
874        result=left.nonsymmetric();
875        break;
876        case PROD:
877        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
878        break;
879        case TRANS:
880        result=left.transpose(m_axis_offset);
881        break;
882        case TRACE:
883        result=left.trace(m_axis_offset);
884        break;
885        case SWAP:
886        result=left.swapaxes(m_axis_offset, m_transpose);
887        break;
888        case MINVAL:
889        result=left.minval();
890        break;
891        case MAXVAL:
892        result=left.minval();
893        break;
894      default:      default:
895      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
896    }    }
897    return result.borrowReadyPtr();    return result.borrowReadyPtr();
898  }  }
# Line 454  DataLazy::collapse() Line 918  DataLazy::collapse()
918    m_op=IDENTITY;    m_op=IDENTITY;
919  }  }
920    
921  /*  
922    \brief Compute the value of the expression (binary operation) for the given sample.  
923    \return Vector which stores the value of the subexpression for the given sample.  
924    \param v A vector to store intermediate results.  
925    \param offset Index in v to begin storing results.  
926    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
927    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
928        {\
929    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
930    If the result is stored in v it should be stored at the offset given.        { \
931    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
932  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
933  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
934  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
935             lroffset+=leftstep; \
936             rroffset+=rightstep; \
937          }\
938          lroffset+=oleftstep;\
939          rroffset+=orightstep;\
940        }
941    
942    
943    // The result will be stored in m_samples
944    // The return value is a pointer to the DataVector, offset is the offset within the return value
945    const DataTypes::ValueType*
946    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
947    {
948    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
949        // collapse so we have a 'E' node or an IDENTITY for some other type
950      if (m_readytype!='E' && m_op!=IDENTITY)
951      {
952        collapse();
953      }
954      if (m_op==IDENTITY)  
955      {
956        const ValueType& vec=m_id->getVectorRO();
957        roffset=m_id->getPointOffset(sampleNo, 0);
958    #ifdef LAZY_STACK_PROF
959    int x;
960    if (&x<stackend[omp_get_thread_num()])
961    {
962           stackend[omp_get_thread_num()]=&x;
963    }
964    #endif
965        return &(vec);
966      }
967      if (m_readytype!='E')
968      {
969        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
970      }
971      if (m_sampleids[tid]==sampleNo)
972      {
973        roffset=tid*m_samplesize;
974        return &(m_samples);        // sample is already resolved
975      }
976      m_sampleids[tid]=sampleNo;
977      switch (getOpgroup(m_op))
978      {
979      case G_UNARY:
980      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
981      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
982      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
983      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
984      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
985      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
986      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
987      default:
988        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
989      }
990    }
991    
992    const DataTypes::ValueType*
993    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
994  {  {
995      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
996      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
997      // processing single points.      // processing single points.
998        // we will also know we won't get identity nodes
999    if (m_readytype!='E')    if (m_readytype!='E')
1000    {    {
1001      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1002    }    }
1003    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1004    const double* left=&((*vleft)[roffset]);    {
1005    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1006    roffset=offset;    }
1007      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1008      const double* left=&((*leftres)[roffset]);
1009      roffset=m_samplesize*tid;
1010      double* result=&(m_samples[roffset]);
1011    switch (m_op)    switch (m_op)
1012    {    {
1013      case SIN:        case SIN:  
1014      tensor_unary_operation(m_samplesize, left, result, ::sin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1015      break;      break;
1016      case COS:      case COS:
1017      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1018      break;      break;
1019      case TAN:      case TAN:
1020      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1021      break;      break;
1022      case ASIN:      case ASIN:
1023      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1024      break;      break;
1025      case ACOS:      case ACOS:
1026      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1027      break;      break;
1028      case ATAN:      case ATAN:
1029      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1030      break;      break;
1031      case SINH:      case SINH:
1032      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1033      break;      break;
1034      case COSH:      case COSH:
1035      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1036      break;      break;
1037      case TANH:      case TANH:
1038      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1039      break;      break;
1040      case ERF:      case ERF:
1041  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1042      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1043  #else  #else
1044      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1045      break;      break;
1046  #endif  #endif
1047     case ASINH:     case ASINH:
1048  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1049      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1050  #else  #else
1051      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1052  #endif    #endif  
1053      break;      break;
1054     case ACOSH:     case ACOSH:
1055  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1056      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1057  #else  #else
1058      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1059  #endif    #endif  
1060      break;      break;
1061     case ATANH:     case ATANH:
1062  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1063      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1064  #else  #else
1065      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1066  #endif    #endif  
1067      break;      break;
1068      case LOG10:      case LOG10:
1069      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1070      break;      break;
1071      case LOG:      case LOG:
1072      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1073      break;      break;
1074      case SIGN:      case SIGN:
1075      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1076      break;      break;
1077      case ABS:      case ABS:
1078      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1079      break;      break;
1080      case NEG:      case NEG:
1081      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 558  DataLazy::resolveUnary(ValueType& v, siz Line 1086  DataLazy::resolveUnary(ValueType& v, siz
1086      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1087      break;      break;
1088      case EXP:      case EXP:
1089      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1090      break;      break;
1091      case SQRT:      case SQRT:
1092      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1093      break;      break;
1094      case RECIP:      case RECIP:
1095      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 578  DataLazy::resolveUnary(ValueType& v, siz Line 1106  DataLazy::resolveUnary(ValueType& v, siz
1106      case LEZ:      case LEZ:
1107      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1108      break;      break;
1109    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1110        case NEZ:
1111        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1112        break;
1113        case EZ:
1114        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1115        break;
1116    
1117      default:      default:
1118      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1119    }    }
1120    return &v;    return &(m_samples);
1121  }  }
1122    
1123    
1124    const DataTypes::ValueType*
1125    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1126    {
1127        // we assume that any collapsing has been done before we get here
1128        // since we only have one argument we don't need to think about only
1129        // processing single points.
1130        // we will also know we won't get identity nodes
1131      if (m_readytype!='E')
1132      {
1133        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1134      }
1135      if (m_op==IDENTITY)
1136      {
1137        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1138      }
1139      size_t loffset=0;
1140      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1141    
1142      roffset=m_samplesize*tid;
1143      unsigned int ndpps=getNumDPPSample();
1144      unsigned int psize=DataTypes::noValues(getShape());
1145      double* result=&(m_samples[roffset]);
1146      switch (m_op)
1147      {
1148        case MINVAL:
1149        {
1150          for (unsigned int z=0;z<ndpps;++z)
1151          {
1152            FMin op;
1153            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1154            loffset+=psize;
1155            result++;
1156          }
1157        }
1158        break;
1159        case MAXVAL:
1160        {
1161          for (unsigned int z=0;z<ndpps;++z)
1162          {
1163          FMax op;
1164          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1165          loffset+=psize;
1166          result++;
1167          }
1168        }
1169        break;
1170        default:
1171        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1172      }
1173      return &(m_samples);
1174    }
1175    
1176    const DataTypes::ValueType*
1177    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1178    {
1179        // we assume that any collapsing has been done before we get here
1180        // since we only have one argument we don't need to think about only
1181        // processing single points.
1182      if (m_readytype!='E')
1183      {
1184        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1185      }
1186      if (m_op==IDENTITY)
1187      {
1188        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1189      }
1190      size_t subroffset;
1191      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1192      roffset=m_samplesize*tid;
1193      size_t loop=0;
1194      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1195      size_t step=getNoValues();
1196      size_t offset=roffset;
1197      switch (m_op)
1198      {
1199        case SYM:
1200        for (loop=0;loop<numsteps;++loop)
1201        {
1202            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1203            subroffset+=step;
1204            offset+=step;
1205        }
1206        break;
1207        case NSYM:
1208        for (loop=0;loop<numsteps;++loop)
1209        {
1210            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1211            subroffset+=step;
1212            offset+=step;
1213        }
1214        break;
1215        default:
1216        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1217      }
1218      return &m_samples;
1219    }
1220    
1221    const DataTypes::ValueType*
1222    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1223    {
1224        // we assume that any collapsing has been done before we get here
1225        // since we only have one argument we don't need to think about only
1226        // processing single points.
1227      if (m_readytype!='E')
1228      {
1229        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1230      }
1231      if (m_op==IDENTITY)
1232      {
1233        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1234      }
1235      size_t subroffset;
1236      size_t offset;
1237      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1238      roffset=m_samplesize*tid;
1239      offset=roffset;
1240      size_t loop=0;
1241      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1242      size_t outstep=getNoValues();
1243      size_t instep=m_left->getNoValues();
1244      switch (m_op)
1245      {
1246        case TRACE:
1247        for (loop=0;loop<numsteps;++loop)
1248        {
1249                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1250            subroffset+=instep;
1251            offset+=outstep;
1252        }
1253        break;
1254        case TRANS:
1255        for (loop=0;loop<numsteps;++loop)
1256        {
1257                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1258            subroffset+=instep;
1259            offset+=outstep;
1260        }
1261        break;
1262        default:
1263        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1264      }
1265      return &m_samples;
1266    }
1267    
1268  #define PROC_OP(X) \  
1269      for (int i=0;i<steps;++i,resultp+=resultStep) \  const DataTypes::ValueType*
1270      { \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1271         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  {
1272         lroffset+=leftStep; \    if (m_readytype!='E')
1273         rroffset+=rightStep; \    {
1274        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1275      }
1276      if (m_op==IDENTITY)
1277      {
1278        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1279      }
1280      size_t subroffset;
1281      size_t offset;
1282      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1283      roffset=m_samplesize*tid;
1284      offset=roffset;
1285      size_t loop=0;
1286      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1287      size_t outstep=getNoValues();
1288      size_t instep=m_left->getNoValues();
1289      switch (m_op)
1290      {
1291        case SWAP:
1292        for (loop=0;loop<numsteps;++loop)
1293        {
1294                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1295            subroffset+=instep;
1296            offset+=outstep;
1297      }      }
1298        break;
1299        default:
1300        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1301      }
1302      return &m_samples;
1303    }
1304    
1305    
1306    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1307  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1308  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1309  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 618  DataLazy::resolveUnary(ValueType& v, siz Line 1313  DataLazy::resolveUnary(ValueType& v, siz
1313  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1314  // For example, 2+Vector.  // For example, 2+Vector.
1315  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1316  DataTypes::ValueType*  const DataTypes::ValueType*
1317  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1318  {  {
1319  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1320    
1321    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1322      // first work out which of the children are expanded      // first work out which of the children are expanded
1323    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1324    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1325    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1326    int steps=(bigloops?1:getNumDPPSample());    {
1327    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1328    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1329    {    bool leftScalar=(m_left->getRank()==0);
1330      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1331      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1332      chunksize=1;    // for scalar    {
1333    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1334    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1335    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1336    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1337      size_t chunksize=1;           // how many doubles will be processed in one go
1338      int leftstep=0;       // how far should the left offset advance after each step
1339      int rightstep=0;
1340      int numsteps=0;       // total number of steps for the inner loop
1341      int oleftstep=0;  // the o variables refer to the outer loop
1342      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1343      int onumsteps=1;
1344      
1345      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1346      bool RES=(rightExp && rightScalar);
1347      bool LS=(!leftExp && leftScalar); // left is a single scalar
1348      bool RS=(!rightExp && rightScalar);
1349      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1350      bool RN=(!rightExp && !rightScalar);
1351      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1352      bool REN=(rightExp && !rightScalar);
1353    
1354      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1355      {
1356        chunksize=m_left->getNumDPPSample()*leftsize;
1357        leftstep=0;
1358        rightstep=0;
1359        numsteps=1;
1360      }
1361      else if (LES || RES)
1362      {
1363        chunksize=1;
1364        if (LES)        // left is an expanded scalar
1365        {
1366            if (RS)
1367            {
1368               leftstep=1;
1369               rightstep=0;
1370               numsteps=m_left->getNumDPPSample();
1371            }
1372            else        // RN or REN
1373            {
1374               leftstep=0;
1375               oleftstep=1;
1376               rightstep=1;
1377               orightstep=(RN ? -(int)rightsize : 0);
1378               numsteps=rightsize;
1379               onumsteps=m_left->getNumDPPSample();
1380            }
1381        }
1382        else        // right is an expanded scalar
1383        {
1384            if (LS)
1385            {
1386               rightstep=1;
1387               leftstep=0;
1388               numsteps=m_right->getNumDPPSample();
1389            }
1390            else
1391            {
1392               rightstep=0;
1393               orightstep=1;
1394               leftstep=1;
1395               oleftstep=(LN ? -(int)leftsize : 0);
1396               numsteps=leftsize;
1397               onumsteps=m_right->getNumDPPSample();
1398            }
1399        }
1400      }
1401      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1402      {
1403        if (LEN)    // and Right will be a single value
1404        {
1405            chunksize=rightsize;
1406            leftstep=rightsize;
1407            rightstep=0;
1408            numsteps=m_left->getNumDPPSample();
1409            if (RS)
1410            {
1411               numsteps*=leftsize;
1412            }
1413        }
1414        else    // REN
1415        {
1416            chunksize=leftsize;
1417            rightstep=leftsize;
1418            leftstep=0;
1419            numsteps=m_right->getNumDPPSample();
1420            if (LS)
1421            {
1422               numsteps*=rightsize;
1423            }
1424        }
1425      }
1426    
1427      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1428      // Get the values of sub-expressions      // Get the values of sub-expressions
1429    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1430    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1431      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1432    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1433    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1434    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1435    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1436    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1437    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1438    
1439    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1440    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1441    
1442    
1443      roffset=m_samplesize*tid;
1444      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
1445    switch(m_op)    switch(m_op)
1446    {    {
1447      case ADD:      case ADD:
1448      PROC_OP(plus<double>());          PROC_OP(NO_ARG,plus<double>());
1449      break;      break;
1450      case SUB:      case SUB:
1451      PROC_OP(minus<double>());      PROC_OP(NO_ARG,minus<double>());
1452      break;      break;
1453      case MUL:      case MUL:
1454      PROC_OP(multiplies<double>());      PROC_OP(NO_ARG,multiplies<double>());
1455      break;      break;
1456      case DIV:      case DIV:
1457      PROC_OP(divides<double>());      PROC_OP(NO_ARG,divides<double>());
1458      break;      break;
1459      case POW:      case POW:
1460      PROC_OP(::pow);         PROC_OP(double (double,double),::pow);
1461      break;      break;
1462      default:      default:
1463      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1464    }    }
1465    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1466    return &v;    return &m_samples;
1467  }  }
1468    
1469    
1470    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1471    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1472    // unlike the other resolve helpers, we must treat these datapoints separately.
1473    const DataTypes::ValueType*
1474    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1475    {
1476    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1477    
1478  /*    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1479    \brief Compute the value of the expression for the given sample.      // first work out which of the children are expanded
1480    \return Vector which stores the value of the subexpression for the given sample.    bool leftExp=(m_left->m_readytype=='E');
1481    \param v A vector to store intermediate results.    bool rightExp=(m_right->m_readytype=='E');
1482    \param offset Index in v to begin storing results.    int steps=getNumDPPSample();
1483    \param sampleNo Sample number to evaluate.    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1484    \param roffset (output parameter) the offset in the return vector where the result begins.    int rightStep=(rightExp?m_right->getNoValues() : 0);
1485    
1486    The return value will be an existing vector so do not deallocate it.    int resultStep=getNoValues();
1487  */    roffset=m_samplesize*tid;
1488  // the vector and the offset are a place where the method could write its data if it wishes    size_t offset=roffset;
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1489    
1490  // the roffset is the offset within the returned vector where the data begins    const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1491  const DataTypes::ValueType*  
1492  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)    const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1493  {  
1494  cout << "Resolve sample " << toString() << endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1495      // collapse so we have a 'E' node or an IDENTITY for some other type  cout << getNoValues() << endl;)
1496    if (m_readytype!='E' && m_op!=IDENTITY)  
1497    {  
1498      collapse();  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1499    }  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1500    if (m_op==IDENTITY)    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1501    {  LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1502      const ValueType& vec=m_id->getVector();  LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1503      if (m_readytype=='C')  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1504      {  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1505      roffset=0;  
1506      return &(vec);    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
1507      }    switch(m_op)
     roffset=m_id->getPointOffset(sampleNo, 0);  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
1508    {    {
1509    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      case PROD:
1510    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      for (int i=0;i<steps;++i,resultp+=resultStep)
1511    default:      {
1512      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");            const double *ptr_0 = &((*left)[lroffset]);
1513              const double *ptr_1 = &((*right)[rroffset]);
1514    
1515    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1516    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1517    
1518              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1519    
1520          lroffset+=leftStep;
1521          rroffset+=rightStep;
1522        }
1523        break;
1524        default:
1525        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1526    }    }
1527      roffset=offset;
1528      return &m_samples;
1529    }
1530    
1531    
1532    const DataTypes::ValueType*
1533    DataLazy::resolveSample(int sampleNo, size_t& roffset)
1534    {
1535    #ifdef _OPENMP
1536        int tid=omp_get_thread_num();
1537    #else
1538        int tid=0;
1539    #endif
1540    
1541    #ifdef LAZY_STACK_PROF
1542        stackstart[tid]=&tid;
1543        stackend[tid]=&tid;
1544        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1545        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1546        #pragma omp critical
1547        if (d>maxstackuse)
1548        {
1549    cout << "Max resolve Stack use " << d << endl;
1550            maxstackuse=d;
1551        }
1552        return r;
1553    #else
1554        return resolveNodeSample(tid, sampleNo, roffset);
1555    #endif
1556    }
1557    
1558    
1559    // This needs to do the work of the identity constructor
1560    void
1561    DataLazy::resolveToIdentity()
1562    {
1563       if (m_op==IDENTITY)
1564        return;
1565       DataReady_ptr p=resolveNodeWorker();
1566       makeIdentity(p);
1567    }
1568    
1569    void DataLazy::makeIdentity(const DataReady_ptr& p)
1570    {
1571       m_op=IDENTITY;
1572       m_axis_offset=0;
1573       m_transpose=0;
1574       m_SL=m_SM=m_SR=0;
1575       m_children=m_height=0;
1576       m_id=p;
1577       if(p->isConstant()) {m_readytype='C';}
1578       else if(p->isExpanded()) {m_readytype='E';}
1579       else if (p->isTagged()) {m_readytype='T';}
1580       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1581       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1582       m_left.reset();
1583       m_right.reset();
1584  }  }
1585    
1586    
 // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
1587  DataReady_ptr  DataReady_ptr
1588  DataLazy::resolve()  DataLazy::resolve()
1589  {  {
1590        resolveToIdentity();
1591        return m_id;
1592    }
1593    
1594  cout << "Sample size=" << m_samplesize << endl;  // This version of resolve uses storage in each node to hold results
1595  cout << "Buffers=" << m_buffsRequired << endl;  DataReady_ptr
1596    DataLazy::resolveNodeWorker()
1597    {
1598    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1599    {    {
1600      collapse();      collapse();
# Line 738  cout << "Buffers=" << m_buffsRequired << Line 1604  cout << "Buffers=" << m_buffsRequired <<
1604      return m_id;      return m_id;
1605    }    }
1606      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired));    // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
   int threadnum=0;  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1607    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1608    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1609    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1610    
1611    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1612    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1613    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1614    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1615    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1616    {    {
1617  cout << "################################# " << sample << endl;      size_t roffset=0;
1618    #ifdef LAZY_STACK_PROF
1619        stackstart[omp_get_thread_num()]=&roffset;
1620        stackend[omp_get_thread_num()]=&roffset;
1621    #endif
1622        #pragma omp for schedule(static)
1623        for (sample=0;sample<totalsamples;++sample)
1624        {
1625            roffset=0;
1626  #ifdef _OPENMP  #ifdef _OPENMP
1627      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1628  #else  #else
1629      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1630  #endif  #endif
1631  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1632      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1633  cerr << "offset=" << outoffset << endl;              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1634      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1635      {      }
1636      resvec[outoffset]=(*res)[resoffset];    }
1637      }  #ifdef LAZY_STACK_PROF
1638  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1639      {
1640        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1641    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1642        if (r>maxstackuse)
1643        {
1644            maxstackuse=r;
1645        }
1646    }    }
1647      cout << "Max resolve Stack use=" << maxstackuse << endl;
1648    #endif
1649    return resptr;    return resptr;
1650  }  }
1651    
# Line 781  DataLazy::toString() const Line 1654  DataLazy::toString() const
1654  {  {
1655    ostringstream oss;    ostringstream oss;
1656    oss << "Lazy Data:";    oss << "Lazy Data:";
1657    intoString(oss);    if (escriptParams.getPRINT_LAZY_TREE()==0)
1658      {
1659          intoString(oss);
1660      }
1661      else
1662      {
1663        oss << endl;
1664        intoTreeString(oss,"");
1665      }
1666    return oss.str();    return oss.str();
1667  }  }
1668    
# Line 789  DataLazy::toString() const Line 1670  DataLazy::toString() const
1670  void  void
1671  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1672  {  {
1673    //    oss << "[" << m_children <<";"<<m_height <<"]";
1674    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1675    {    {
1676    case G_IDENTITY:    case G_IDENTITY:
# Line 818  DataLazy::intoString(ostringstream& oss) Line 1700  DataLazy::intoString(ostringstream& oss)
1700      oss << ')';      oss << ')';
1701      break;      break;
1702    case G_UNARY:    case G_UNARY:
1703      case G_UNARY_P:
1704      case G_NP1OUT:
1705      case G_NP1OUT_P:
1706      case G_REDUCTION:
1707        oss << opToString(m_op) << '(';
1708        m_left->intoString(oss);
1709        oss << ')';
1710        break;
1711      case G_TENSORPROD:
1712        oss << opToString(m_op) << '(';
1713        m_left->intoString(oss);
1714        oss << ", ";
1715        m_right->intoString(oss);
1716        oss << ')';
1717        break;
1718      case G_NP1OUT_2P:
1719      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1720      m_left->intoString(oss);      m_left->intoString(oss);
1721        oss << ", " << m_axis_offset << ", " << m_transpose;
1722      oss << ')';      oss << ')';
1723      break;      break;
1724    default:    default:
# Line 827  DataLazy::intoString(ostringstream& oss) Line 1726  DataLazy::intoString(ostringstream& oss)
1726    }    }
1727  }  }
1728    
1729  // Note that in this case, deepCopy does not make copies of the leaves.  
1730  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1731    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1732    {
1733      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1734      switch (getOpgroup(m_op))
1735      {
1736      case G_IDENTITY:
1737        if (m_id->isExpanded())
1738        {
1739           oss << "E";
1740        }
1741        else if (m_id->isTagged())
1742        {
1743          oss << "T";
1744        }
1745        else if (m_id->isConstant())
1746        {
1747          oss << "C";
1748        }
1749        else
1750        {
1751          oss << "?";
1752        }
1753        oss << '@' << m_id.get() << endl;
1754        break;
1755      case G_BINARY:
1756        oss << opToString(m_op) << endl;
1757        indent+='.';
1758        m_left->intoTreeString(oss, indent);
1759        m_right->intoTreeString(oss, indent);
1760        break;
1761      case G_UNARY:
1762      case G_UNARY_P:
1763      case G_NP1OUT:
1764      case G_NP1OUT_P:
1765      case G_REDUCTION:
1766        oss << opToString(m_op) << endl;
1767        indent+='.';
1768        m_left->intoTreeString(oss, indent);
1769        break;
1770      case G_TENSORPROD:
1771        oss << opToString(m_op) << endl;
1772        indent+='.';
1773        m_left->intoTreeString(oss, indent);
1774        m_right->intoTreeString(oss, indent);
1775        break;
1776      case G_NP1OUT_2P:
1777        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1778        indent+='.';
1779        m_left->intoTreeString(oss, indent);
1780        break;
1781      default:
1782        oss << "UNKNOWN";
1783      }
1784    }
1785    
1786    
1787  DataAbstract*  DataAbstract*
1788  DataLazy::deepCopy()  DataLazy::deepCopy()
1789  {  {
1790    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1791    {    {
1792      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1793      case G_UNARY:
1794      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1795      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1796      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1797      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1798      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1799      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1800      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1801      default:
1802        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1803    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1804  }  }
1805    
1806    
1807    
1808    // There is no single, natural interpretation of getLength on DataLazy.
1809    // Instances of DataReady can look at the size of their vectors.
1810    // For lazy though, it could be the size the data would be if it were resolved;
1811    // or it could be some function of the lengths of the DataReady instances which
1812    // form part of the expression.
1813    // Rather than have people making assumptions, I have disabled the method.
1814  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1815  DataLazy::getLength() const  DataLazy::getLength() const
1816  {  {
1817    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1818  }  }
1819    
1820    
# Line 853  DataLazy::getSlice(const DataTypes::Regi Line 1824  DataLazy::getSlice(const DataTypes::Regi
1824    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1825  }  }
1826    
1827    
1828    // To do this we need to rely on our child nodes
1829    DataTypes::ValueType::size_type
1830    DataLazy::getPointOffset(int sampleNo,
1831                     int dataPointNo)
1832    {
1833      if (m_op==IDENTITY)
1834      {
1835        return m_id->getPointOffset(sampleNo,dataPointNo);
1836      }
1837      if (m_readytype!='E')
1838      {
1839        collapse();
1840        return m_id->getPointOffset(sampleNo,dataPointNo);
1841      }
1842      // at this point we do not have an identity node and the expression will be Expanded
1843      // so we only need to know which child to ask
1844      if (m_left->m_readytype=='E')
1845      {
1846        return m_left->getPointOffset(sampleNo,dataPointNo);
1847      }
1848      else
1849      {
1850        return m_right->getPointOffset(sampleNo,dataPointNo);
1851      }
1852    }
1853    
1854    // To do this we need to rely on our child nodes
1855  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1856  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1857                   int dataPointNo) const                   int dataPointNo) const
1858  {  {
1859    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1860      {
1861        return m_id->getPointOffset(sampleNo,dataPointNo);
1862      }
1863      if (m_readytype=='E')
1864      {
1865        // at this point we do not have an identity node and the expression will be Expanded
1866        // so we only need to know which child to ask
1867        if (m_left->m_readytype=='E')
1868        {
1869        return m_left->getPointOffset(sampleNo,dataPointNo);
1870        }
1871        else
1872        {
1873        return m_right->getPointOffset(sampleNo,dataPointNo);
1874        }
1875      }
1876      if (m_readytype=='C')
1877      {
1878        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1879      }
1880      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1881  }  }
1882    
1883  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
1884  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
1885  void  void
1886  DataLazy::setToZero()  DataLazy::setToZero()
1887  {  {
1888    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
1889    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1890    m_op=IDENTITY;  //   m_op=IDENTITY;
1891    m_right.reset();    //   m_right.reset();  
1892    m_left.reset();  //   m_left.reset();
1893    m_readytype='C';  //   m_readytype='C';
1894    m_buffsRequired=1;  //   m_buffsRequired=1;
1895    
1896      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
1897      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1898    }
1899    
1900    bool
1901    DataLazy::actsExpanded() const
1902    {
1903        return (m_readytype=='E');
1904  }  }
1905    
1906  }   // end namespace  }   // end namespace

Legend:
Removed from v.1910  
changed lines
  Added in v.2779

  ViewVC Help
Powered by ViewVC 1.1.26