/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1910 by jfenwick, Thu Oct 23 03:05:28 2008 UTC trunk/escript/src/DataLazy.cpp revision 2777 by jfenwick, Thu Nov 26 01:06:00 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #include <iomanip>      // for some fancy formatting in debug
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49    
50  /*  /*
51  How does DataLazy work?  How does DataLazy work?
# Line 47  I will refer to individual DataLazy obje Line 67  I will refer to individual DataLazy obje
67  Each node also stores:  Each node also stores:
68  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
69      evaluated.      evaluated.
 - m_length ~ how many values would be stored in the answer if the expression was evaluated.  
70  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
71      evaluate the expression.      evaluate the expression.
72  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
# Line 69  The convention that I use, is that the r Line 88  The convention that I use, is that the r
88    
89  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91    
92    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93    1) Add to the ES_optype.
94    2) determine what opgroup your operation belongs to (X)
95    3) add a string for the op to the end of ES_opstrings
96    4) increase ES_opcount
97    5) add an entry (X) to opgroups
98    6) add an entry to the switch in collapseToReady
99    7) add an entry to resolveX
100  */  */
101    
102    
# Line 78  using namespace boost; Line 106  using namespace boost;
106  namespace escript  namespace escript
107  {  {
108    
 const std::string&  
 opToString(ES_optype op);  
   
109  namespace  namespace
110  {  {
111    
112    
113    // enabling this will print out when ever the maximum stacksize used by resolve increases
114    // it assumes _OPENMP is also in use
115    //#define LAZY_STACK_PROF
116    
117    
118    
119    #ifndef _OPENMP
120      #ifdef LAZY_STACK_PROF
121      #undef LAZY_STACK_PROF
122      #endif
123    #endif
124    
125    
126    #ifdef LAZY_STACK_PROF
127    std::vector<void*> stackstart(getNumberOfThreads());
128    std::vector<void*> stackend(getNumberOfThreads());
129    size_t maxstackuse=0;
130    #endif
131    
132  enum ES_opgroup  enum ES_opgroup
133  {  {
134     G_UNKNOWN,     G_UNKNOWN,
135     G_IDENTITY,     G_IDENTITY,
136     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
137     G_UNARY      // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
138       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
139       G_NP1OUT,        // non-pointwise op with one output
140       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
141       G_TENSORPROD,    // general tensor product
142       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
143       G_REDUCTION      // non-pointwise unary op with a scalar output
144  };  };
145    
146    
# Line 100  string ES_opstrings[]={"UNKNOWN","IDENTI Line 151  string ES_opstrings[]={"UNKNOWN","IDENTI
151              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
152              "asinh","acosh","atanh",              "asinh","acosh","atanh",
153              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
154              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
155  int ES_opcount=32;              "symmetric","nonsymmetric",
156                "prod",
157                "transpose", "trace",
158                "swapaxes",
159                "minval", "maxval"};
160    int ES_opcount=43;
161  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
162              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
163              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
164              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
165              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
166              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
167                G_NP1OUT,G_NP1OUT,
168                G_TENSORPROD,
169                G_NP1OUT_P, G_NP1OUT_P,
170                G_NP1OUT_2P,
171                G_REDUCTION, G_REDUCTION};
172  inline  inline
173  ES_opgroup  ES_opgroup
174  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 124  resultFS(DataAbstract_ptr left, DataAbst Line 185  resultFS(DataAbstract_ptr left, DataAbst
185      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
186      // programming error exception.      // programming error exception.
187    
188      FunctionSpace l=left->getFunctionSpace();
189      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
190      {    if (l!=r)
191          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
192      }      if (r.probeInterpolation(l))
193      return left->getFunctionSpace();      {
194        return l;
195        }
196        if (l.probeInterpolation(r))
197        {
198        return r;
199        }
200        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
201      }
202      return l;
203  }  }
204    
205  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
206    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
207  DataTypes::ShapeType  DataTypes::ShapeType
208  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
209  {  {
210      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
211      {      {
212        if (getOpgroup(op)!=G_BINARY)        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
213        {        {
214          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
215        }        }
216    
217        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
218        {        {
219          return right->getShape();          return right->getShape();
# Line 155  resultShape(DataAbstract_ptr left, DataA Line 227  resultShape(DataAbstract_ptr left, DataA
227      return left->getShape();      return left->getShape();
228  }  }
229    
230  // determine the number of points in the result of "left op right"  // return the shape for "op left"
231  size_t  
232  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataTypes::ShapeType
233    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
234  {  {
235     switch (getOpgroup(op))      switch(op)
236     {      {
237     case G_BINARY: return left->getLength();          case TRANS:
238     case G_UNARY: return left->getLength();         {            // for the scoping of variables
239     default:          const DataTypes::ShapeType& s=left->getShape();
240      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
241     }          int rank=left->getRank();
242            if (axis_offset<0 || axis_offset>rank)
243            {
244                   throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
245                }
246                for (int i=0; i<rank; i++)
247            {
248               int index = (axis_offset+i)%rank;
249                   sh.push_back(s[index]); // Append to new shape
250                }
251            return sh;
252           }
253        break;
254        case TRACE:
255           {
256            int rank=left->getRank();
257            if (rank<2)
258            {
259               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
260            }
261            if ((axis_offset>rank-2) || (axis_offset<0))
262            {
263               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
264            }
265            if (rank==2)
266            {
267               return DataTypes::scalarShape;
268            }
269            else if (rank==3)
270            {
271               DataTypes::ShapeType sh;
272                   if (axis_offset==0)
273               {
274                    sh.push_back(left->getShape()[2]);
275                   }
276                   else     // offset==1
277               {
278                sh.push_back(left->getShape()[0]);
279                   }
280               return sh;
281            }
282            else if (rank==4)
283            {
284               DataTypes::ShapeType sh;
285               const DataTypes::ShapeType& s=left->getShape();
286                   if (axis_offset==0)
287               {
288                    sh.push_back(s[2]);
289                    sh.push_back(s[3]);
290                   }
291                   else if (axis_offset==1)
292               {
293                    sh.push_back(s[0]);
294                    sh.push_back(s[3]);
295                   }
296               else     // offset==2
297               {
298                sh.push_back(s[0]);
299                sh.push_back(s[1]);
300               }
301               return sh;
302            }
303            else        // unknown rank
304            {
305               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
306            }
307           }
308        break;
309            default:
310        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
311        }
312  }  }
313    
314  // determine the number of samples requires to evaluate an expression combining left and right  DataTypes::ShapeType
315  int  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
316  {  {
317     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
318     {       // Some of the checks are probably redundant here
319     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
320     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
321     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
322     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
323      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
324     }       int rank=left->getRank();
325         if (rank<2) {
326            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
327         }
328         if (axis0<0 || axis0>rank-1) {
329            throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
330         }
331         if (axis1<0 || axis1>rank-1) {
332             throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
333         }
334         if (axis0 == axis1) {
335             throw DataException("Error - Data::swapaxes: axis indices must be different.");
336         }
337         if (axis0 > axis1) {
338             axis0_tmp=axis1;
339             axis1_tmp=axis0;
340         } else {
341             axis0_tmp=axis0;
342             axis1_tmp=axis1;
343         }
344         for (int i=0; i<rank; i++) {
345           if (i == axis0_tmp) {
346              out_shape.push_back(s[axis1_tmp]);
347           } else if (i == axis1_tmp) {
348              out_shape.push_back(s[axis0_tmp]);
349           } else {
350              out_shape.push_back(s[i]);
351           }
352         }
353        return out_shape;
354  }  }
355    
356    
357    // determine the output shape for the general tensor product operation
358    // the additional parameters return information required later for the product
359    // the majority of this code is copy pasted from C_General_Tensor_Product
360    DataTypes::ShapeType
361    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
362    {
363        
364      // Get rank and shape of inputs
365      int rank0 = left->getRank();
366      int rank1 = right->getRank();
367      const DataTypes::ShapeType& shape0 = left->getShape();
368      const DataTypes::ShapeType& shape1 = right->getShape();
369    
370      // Prepare for the loops of the product and verify compatibility of shapes
371      int start0=0, start1=0;
372      if (transpose == 0)       {}
373      else if (transpose == 1)  { start0 = axis_offset; }
374      else if (transpose == 2)  { start1 = rank1-axis_offset; }
375      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
376    
377      if (rank0<axis_offset)
378      {
379        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
380      }
381    
382      // Adjust the shapes for transpose
383      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
384      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
385      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
386      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
387    
388      // Prepare for the loops of the product
389      SL=1, SM=1, SR=1;
390      for (int i=0; i<rank0-axis_offset; i++)   {
391        SL *= tmpShape0[i];
392      }
393      for (int i=rank0-axis_offset; i<rank0; i++)   {
394        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
395          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
396        }
397        SM *= tmpShape0[i];
398      }
399      for (int i=axis_offset; i<rank1; i++)     {
400        SR *= tmpShape1[i];
401      }
402    
403      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
404      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
405      {         // block to limit the scope of out_index
406         int out_index=0;
407         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
408         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
409      }
410    
411      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
412      {
413         ostringstream os;
414         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
415         throw DataException(os.str());
416      }
417    
418      return shape2;
419    }
420    
421  }   // end anonymous namespace  }   // end anonymous namespace
422    
423    
# Line 198  opToString(ES_optype op) Line 433  opToString(ES_optype op)
433    return ES_opstrings[op];    return ES_opstrings[op];
434  }  }
435    
436    void DataLazy::LazyNodeSetup()
437    {
438    #ifdef _OPENMP
439        int numthreads=omp_get_max_threads();
440        m_samples.resize(numthreads*m_samplesize);
441        m_sampleids=new int[numthreads];
442        for (int i=0;i<numthreads;++i)
443        {
444            m_sampleids[i]=-1;  
445        }
446    #else
447        m_samples.resize(m_samplesize);
448        m_sampleids=new int[1];
449        m_sampleids[0]=-1;
450    #endif  // _OPENMP
451    }
452    
453    
454    // Creates an identity node
455  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
456      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
457      m_op(IDENTITY)      ,m_sampleids(0),
458        m_samples(1)
459  {  {
460     if (p->isLazy())     if (p->isLazy())
461     {     {
# Line 212  DataLazy::DataLazy(DataAbstract_ptr p) Line 466  DataLazy::DataLazy(DataAbstract_ptr p)
466     }     }
467     else     else
468     {     {
469      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
470      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
471      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
472      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
473     }     }
474     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
475  }  }
476    
   
   
   
477  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
478      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
479      m_op(op)      m_op(op),
480        m_axis_offset(0),
481        m_transpose(0),
482        m_SL(0), m_SM(0), m_SR(0)
483  {  {
484     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
485     {     {
486      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
487     }     }
488    
489     DataLazy_ptr lleft;     DataLazy_ptr lleft;
490     if (!left->isLazy())     if (!left->isLazy())
491     {     {
# Line 245  DataLazy::DataLazy(DataAbstract_ptr left Line 496  DataLazy::DataLazy(DataAbstract_ptr left
496      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
497     }     }
498     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
499     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
500     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
501       m_children=m_left->m_children+1;
502       m_height=m_left->m_height+1;
503       LazyNodeSetup();
504       SIZELIMIT
505  }  }
506    
507    
508    // In this constructor we need to consider interpolation
509  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
510      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
511      m_op(op)      m_op(op),
512        m_SL(0), m_SM(0), m_SR(0)
513  {  {
514     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
515       if ((getOpgroup(op)!=G_BINARY))
516     {     {
517      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
518     }     }
519    
520       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
521       {
522        FunctionSpace fs=getFunctionSpace();
523        Data ltemp(left);
524        Data tmp(ltemp,fs);
525        left=tmp.borrowDataPtr();
526       }
527       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
528       {
529        Data tmp(Data(right),getFunctionSpace());
530        right=tmp.borrowDataPtr();
531    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
532       }
533       left->operandCheck(*right);
534    
535     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
536     {     {
537      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
538    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
539     }     }
540     else     else
541     {     {
542      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
543    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
544     }     }
545     if (right->isLazy())     if (right->isLazy())
546     {     {
547      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
548    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
549     }     }
550     else     else
551     {     {
552      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
553    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
554     }     }
555     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
556     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 290  DataLazy::DataLazy(DataAbstract_ptr left Line 566  DataLazy::DataLazy(DataAbstract_ptr left
566     {     {
567      m_readytype='C';      m_readytype='C';
568     }     }
569     m_length=resultLength(m_left,m_right,m_op);     m_samplesize=getNumDPPSample()*getNoValues();
570     m_samplesize=getNumDPPSample()*getNoValues();         m_children=m_left->m_children+m_right->m_children+2;
571     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
572  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
573       SIZELIMIT
574    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
575  }  }
576    
577    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
578        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
579        m_op(op),
580        m_axis_offset(axis_offset),
581        m_transpose(transpose)
582    {
583       if ((getOpgroup(op)!=G_TENSORPROD))
584       {
585        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
586       }
587       if ((transpose>2) || (transpose<0))
588       {
589        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
590       }
591       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
592       {
593        FunctionSpace fs=getFunctionSpace();
594        Data ltemp(left);
595        Data tmp(ltemp,fs);
596        left=tmp.borrowDataPtr();
597       }
598       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
599       {
600        Data tmp(Data(right),getFunctionSpace());
601        right=tmp.borrowDataPtr();
602       }
603    //    left->operandCheck(*right);
604    
605  DataLazy::~DataLazy()     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
606       {
607        m_left=dynamic_pointer_cast<DataLazy>(left);
608       }
609       else
610       {
611        m_left=DataLazy_ptr(new DataLazy(left));
612       }
613       if (right->isLazy())
614       {
615        m_right=dynamic_pointer_cast<DataLazy>(right);
616       }
617       else
618       {
619        m_right=DataLazy_ptr(new DataLazy(right));
620       }
621       char lt=m_left->m_readytype;
622       char rt=m_right->m_readytype;
623       if (lt=='E' || rt=='E')
624       {
625        m_readytype='E';
626       }
627       else if (lt=='T' || rt=='T')
628       {
629        m_readytype='T';
630       }
631       else
632       {
633        m_readytype='C';
634       }
635       m_samplesize=getNumDPPSample()*getNoValues();
636       m_children=m_left->m_children+m_right->m_children+2;
637       m_height=max(m_left->m_height,m_right->m_height)+1;
638       LazyNodeSetup();
639       SIZELIMIT
640    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
641    }
642    
643    
644    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
645        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
646        m_op(op),
647        m_axis_offset(axis_offset),
648        m_transpose(0),
649        m_tol(0)
650    {
651       if ((getOpgroup(op)!=G_NP1OUT_P))
652       {
653        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
654       }
655       DataLazy_ptr lleft;
656       if (!left->isLazy())
657       {
658        lleft=DataLazy_ptr(new DataLazy(left));
659       }
660       else
661       {
662        lleft=dynamic_pointer_cast<DataLazy>(left);
663       }
664       m_readytype=lleft->m_readytype;
665       m_left=lleft;
666       m_samplesize=getNumDPPSample()*getNoValues();
667       m_children=m_left->m_children+1;
668       m_height=m_left->m_height+1;
669       LazyNodeSetup();
670       SIZELIMIT
671    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
672    }
673    
674    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
675        : parent(left->getFunctionSpace(), left->getShape()),
676        m_op(op),
677        m_axis_offset(0),
678        m_transpose(0),
679        m_tol(tol)
680  {  {
681       if ((getOpgroup(op)!=G_UNARY_P))
682       {
683        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
684       }
685       DataLazy_ptr lleft;
686       if (!left->isLazy())
687       {
688        lleft=DataLazy_ptr(new DataLazy(left));
689       }
690       else
691       {
692        lleft=dynamic_pointer_cast<DataLazy>(left);
693       }
694       m_readytype=lleft->m_readytype;
695       m_left=lleft;
696       m_samplesize=getNumDPPSample()*getNoValues();
697       m_children=m_left->m_children+1;
698       m_height=m_left->m_height+1;
699       LazyNodeSetup();
700       SIZELIMIT
701    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
702  }  }
703    
704    
705  int  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
706  DataLazy::getBuffsRequired() const      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
707        m_op(op),
708        m_axis_offset(axis0),
709        m_transpose(axis1),
710        m_tol(0)
711    {
712       if ((getOpgroup(op)!=G_NP1OUT_2P))
713       {
714        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
715       }
716       DataLazy_ptr lleft;
717       if (!left->isLazy())
718       {
719        lleft=DataLazy_ptr(new DataLazy(left));
720       }
721       else
722       {
723        lleft=dynamic_pointer_cast<DataLazy>(left);
724       }
725       m_readytype=lleft->m_readytype;
726       m_left=lleft;
727       m_samplesize=getNumDPPSample()*getNoValues();
728       m_children=m_left->m_children+1;
729       m_height=m_left->m_height+1;
730       LazyNodeSetup();
731       SIZELIMIT
732    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
733    }
734    
735    DataLazy::~DataLazy()
736  {  {
737      return m_buffsRequired;     delete[] m_sampleids;
738  }  }
739    
740    
# Line 328  DataLazy::collapseToReady() Line 757  DataLazy::collapseToReady()
757    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
758    Data left(pleft);    Data left(pleft);
759    Data right;    Data right;
760    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
761    {    {
762      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
763    }    }
# Line 427  DataLazy::collapseToReady() Line 856  DataLazy::collapseToReady()
856      case LEZ:      case LEZ:
857      result=left.whereNonPositive();      result=left.whereNonPositive();
858      break;      break;
859        case NEZ:
860        result=left.whereNonZero(m_tol);
861        break;
862        case EZ:
863        result=left.whereZero(m_tol);
864        break;
865        case SYM:
866        result=left.symmetric();
867        break;
868        case NSYM:
869        result=left.nonsymmetric();
870        break;
871        case PROD:
872        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
873        break;
874        case TRANS:
875        result=left.transpose(m_axis_offset);
876        break;
877        case TRACE:
878        result=left.trace(m_axis_offset);
879        break;
880        case SWAP:
881        result=left.swapaxes(m_axis_offset, m_transpose);
882        break;
883        case MINVAL:
884        result=left.minval();
885        break;
886        case MAXVAL:
887        result=left.minval();
888        break;
889      default:      default:
890      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
891    }    }
892    return result.borrowReadyPtr();    return result.borrowReadyPtr();
893  }  }
# Line 454  DataLazy::collapse() Line 913  DataLazy::collapse()
913    m_op=IDENTITY;    m_op=IDENTITY;
914  }  }
915    
916  /*  
917    \brief Compute the value of the expression (binary operation) for the given sample.  
918    \return Vector which stores the value of the subexpression for the given sample.  
919    \param v A vector to store intermediate results.  
920    \param offset Index in v to begin storing results.  
921    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
922    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
923        {\
924    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
925    If the result is stored in v it should be stored at the offset given.        { \
926    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
927  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
928  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
929  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
930             lroffset+=leftstep; \
931             rroffset+=rightstep; \
932          }\
933          lroffset+=oleftstep;\
934          rroffset+=orightstep;\
935        }
936    
937    
938    // The result will be stored in m_samples
939    // The return value is a pointer to the DataVector, offset is the offset within the return value
940    const DataTypes::ValueType*
941    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
942    {
943    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
944        // collapse so we have a 'E' node or an IDENTITY for some other type
945      if (m_readytype!='E' && m_op!=IDENTITY)
946      {
947        collapse();
948      }
949      if (m_op==IDENTITY)  
950      {
951        const ValueType& vec=m_id->getVectorRO();
952        roffset=m_id->getPointOffset(sampleNo, 0);
953    #ifdef LAZY_STACK_PROF
954    int x;
955    if (&x<stackend[omp_get_thread_num()])
956    {
957           stackend[omp_get_thread_num()]=&x;
958    }
959    #endif
960        return &(vec);
961      }
962      if (m_readytype!='E')
963      {
964        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
965      }
966      if (m_sampleids[tid]==sampleNo)
967      {
968        roffset=tid*m_samplesize;
969        return &(m_samples);        // sample is already resolved
970      }
971      m_sampleids[tid]=sampleNo;
972      switch (getOpgroup(m_op))
973      {
974      case G_UNARY:
975      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
976      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
977      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
978      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
979      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
980      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
981      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
982      default:
983        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
984      }
985    }
986    
987    const DataTypes::ValueType*
988    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
989  {  {
990      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
991      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
992      // processing single points.      // processing single points.
993        // we will also know we won't get identity nodes
994    if (m_readytype!='E')    if (m_readytype!='E')
995    {    {
996      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
997    }    }
998    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
999    const double* left=&((*vleft)[roffset]);    {
1000    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1001    roffset=offset;    }
1002      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1003      const double* left=&((*leftres)[roffset]);
1004      roffset=m_samplesize*tid;
1005      double* result=&(m_samples[roffset]);
1006    switch (m_op)    switch (m_op)
1007    {    {
1008      case SIN:        case SIN:  
1009      tensor_unary_operation(m_samplesize, left, result, ::sin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1010      break;      break;
1011      case COS:      case COS:
1012      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1013      break;      break;
1014      case TAN:      case TAN:
1015      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1016      break;      break;
1017      case ASIN:      case ASIN:
1018      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1019      break;      break;
1020      case ACOS:      case ACOS:
1021      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1022      break;      break;
1023      case ATAN:      case ATAN:
1024      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1025      break;      break;
1026      case SINH:      case SINH:
1027      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1028      break;      break;
1029      case COSH:      case COSH:
1030      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1031      break;      break;
1032      case TANH:      case TANH:
1033      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1034      break;      break;
1035      case ERF:      case ERF:
1036  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1037      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1038  #else  #else
1039      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1040      break;      break;
1041  #endif  #endif
1042     case ASINH:     case ASINH:
1043  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1044      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1045  #else  #else
1046      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1047  #endif    #endif  
1048      break;      break;
1049     case ACOSH:     case ACOSH:
1050  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1051      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1052  #else  #else
1053      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1054  #endif    #endif  
1055      break;      break;
1056     case ATANH:     case ATANH:
1057  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1058      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1059  #else  #else
1060      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1061  #endif    #endif  
1062      break;      break;
1063      case LOG10:      case LOG10:
1064      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1065      break;      break;
1066      case LOG:      case LOG:
1067      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1068      break;      break;
1069      case SIGN:      case SIGN:
1070      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1071      break;      break;
1072      case ABS:      case ABS:
1073      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1074      break;      break;
1075      case NEG:      case NEG:
1076      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 558  DataLazy::resolveUnary(ValueType& v, siz Line 1081  DataLazy::resolveUnary(ValueType& v, siz
1081      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1082      break;      break;
1083      case EXP:      case EXP:
1084      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1085      break;      break;
1086      case SQRT:      case SQRT:
1087      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1088      break;      break;
1089      case RECIP:      case RECIP:
1090      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 578  DataLazy::resolveUnary(ValueType& v, siz Line 1101  DataLazy::resolveUnary(ValueType& v, siz
1101      case LEZ:      case LEZ:
1102      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1103      break;      break;
1104    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1105        case NEZ:
1106        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1107        break;
1108        case EZ:
1109        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1110        break;
1111    
1112      default:      default:
1113      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1114    }    }
1115    return &v;    return &(m_samples);
1116  }  }
1117    
1118    
1119    const DataTypes::ValueType*
1120    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1121    {
1122        // we assume that any collapsing has been done before we get here
1123        // since we only have one argument we don't need to think about only
1124        // processing single points.
1125        // we will also know we won't get identity nodes
1126      if (m_readytype!='E')
1127      {
1128        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1129      }
1130      if (m_op==IDENTITY)
1131      {
1132        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1133      }
1134      size_t loffset=0;
1135      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1136    
1137      roffset=m_samplesize*tid;
1138      unsigned int ndpps=getNumDPPSample();
1139      unsigned int psize=DataTypes::noValues(getShape());
1140      double* result=&(m_samples[roffset]);
1141      switch (m_op)
1142      {
1143        case MINVAL:
1144        {
1145          for (unsigned int z=0;z<ndpps;++z)
1146          {
1147            FMin op;
1148            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1149            loffset+=psize;
1150            result++;
1151          }
1152        }
1153        break;
1154        case MAXVAL:
1155        {
1156          for (unsigned int z=0;z<ndpps;++z)
1157          {
1158          FMax op;
1159          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1160          loffset+=psize;
1161          result++;
1162          }
1163        }
1164        break;
1165        default:
1166        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1167      }
1168      return &(m_samples);
1169    }
1170    
1171    const DataTypes::ValueType*
1172    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1173    {
1174        // we assume that any collapsing has been done before we get here
1175        // since we only have one argument we don't need to think about only
1176        // processing single points.
1177      if (m_readytype!='E')
1178      {
1179        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1180      }
1181      if (m_op==IDENTITY)
1182      {
1183        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1184      }
1185      size_t subroffset;
1186      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1187      roffset=m_samplesize*tid;
1188      size_t loop=0;
1189      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1190      size_t step=getNoValues();
1191      size_t offset=roffset;
1192      switch (m_op)
1193      {
1194        case SYM:
1195        for (loop=0;loop<numsteps;++loop)
1196        {
1197            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1198            subroffset+=step;
1199            offset+=step;
1200        }
1201        break;
1202        case NSYM:
1203        for (loop=0;loop<numsteps;++loop)
1204        {
1205            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1206            subroffset+=step;
1207            offset+=step;
1208        }
1209        break;
1210        default:
1211        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1212      }
1213      return &m_samples;
1214    }
1215    
1216    const DataTypes::ValueType*
1217    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1218    {
1219        // we assume that any collapsing has been done before we get here
1220        // since we only have one argument we don't need to think about only
1221        // processing single points.
1222      if (m_readytype!='E')
1223      {
1224        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1225      }
1226      if (m_op==IDENTITY)
1227      {
1228        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1229      }
1230      size_t subroffset;
1231      size_t offset;
1232      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1233      roffset=m_samplesize*tid;
1234      offset=roffset;
1235      size_t loop=0;
1236      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1237      size_t outstep=getNoValues();
1238      size_t instep=m_left->getNoValues();
1239      switch (m_op)
1240      {
1241        case TRACE:
1242        for (loop=0;loop<numsteps;++loop)
1243        {
1244                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1245            subroffset+=instep;
1246            offset+=outstep;
1247        }
1248        break;
1249        case TRANS:
1250        for (loop=0;loop<numsteps;++loop)
1251        {
1252                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1253            subroffset+=instep;
1254            offset+=outstep;
1255        }
1256        break;
1257        default:
1258        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1259      }
1260      return &m_samples;
1261    }
1262    
1263  #define PROC_OP(X) \  
1264      for (int i=0;i<steps;++i,resultp+=resultStep) \  const DataTypes::ValueType*
1265      { \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1266         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  {
1267         lroffset+=leftStep; \    if (m_readytype!='E')
1268         rroffset+=rightStep; \    {
1269        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1270      }
1271      if (m_op==IDENTITY)
1272      {
1273        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1274      }
1275      size_t subroffset;
1276      size_t offset;
1277      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1278      roffset=m_samplesize*tid;
1279      offset=roffset;
1280      size_t loop=0;
1281      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1282      size_t outstep=getNoValues();
1283      size_t instep=m_left->getNoValues();
1284      switch (m_op)
1285      {
1286        case SWAP:
1287        for (loop=0;loop<numsteps;++loop)
1288        {
1289                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1290            subroffset+=instep;
1291            offset+=outstep;
1292      }      }
1293        break;
1294        default:
1295        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1296      }
1297      return &m_samples;
1298    }
1299    
1300    
1301    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1302  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1303  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1304  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 618  DataLazy::resolveUnary(ValueType& v, siz Line 1308  DataLazy::resolveUnary(ValueType& v, siz
1308  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1309  // For example, 2+Vector.  // For example, 2+Vector.
1310  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1311  DataTypes::ValueType*  const DataTypes::ValueType*
1312  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1313  {  {
1314  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1315    
1316    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1317      // first work out which of the children are expanded      // first work out which of the children are expanded
1318    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1319    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1320    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1321    int steps=(bigloops?1:getNumDPPSample());    {
1322    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1323    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1324    {    bool leftScalar=(m_left->getRank()==0);
1325      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1326      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1327      chunksize=1;    // for scalar    {
1328    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1329    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1330    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1331    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1332      size_t chunksize=1;           // how many doubles will be processed in one go
1333      int leftstep=0;       // how far should the left offset advance after each step
1334      int rightstep=0;
1335      int numsteps=0;       // total number of steps for the inner loop
1336      int oleftstep=0;  // the o variables refer to the outer loop
1337      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1338      int onumsteps=1;
1339      
1340      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1341      bool RES=(rightExp && rightScalar);
1342      bool LS=(!leftExp && leftScalar); // left is a single scalar
1343      bool RS=(!rightExp && rightScalar);
1344      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1345      bool RN=(!rightExp && !rightScalar);
1346      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1347      bool REN=(rightExp && !rightScalar);
1348    
1349      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1350      {
1351        chunksize=m_left->getNumDPPSample()*leftsize;
1352        leftstep=0;
1353        rightstep=0;
1354        numsteps=1;
1355      }
1356      else if (LES || RES)
1357      {
1358        chunksize=1;
1359        if (LES)        // left is an expanded scalar
1360        {
1361            if (RS)
1362            {
1363               leftstep=1;
1364               rightstep=0;
1365               numsteps=m_left->getNumDPPSample();
1366            }
1367            else        // RN or REN
1368            {
1369               leftstep=0;
1370               oleftstep=1;
1371               rightstep=1;
1372               orightstep=(RN ? -(int)rightsize : 0);
1373               numsteps=rightsize;
1374               onumsteps=m_left->getNumDPPSample();
1375            }
1376        }
1377        else        // right is an expanded scalar
1378        {
1379            if (LS)
1380            {
1381               rightstep=1;
1382               leftstep=0;
1383               numsteps=m_right->getNumDPPSample();
1384            }
1385            else
1386            {
1387               rightstep=0;
1388               orightstep=1;
1389               leftstep=1;
1390               oleftstep=(LN ? -(int)leftsize : 0);
1391               numsteps=leftsize;
1392               onumsteps=m_right->getNumDPPSample();
1393            }
1394        }
1395      }
1396      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1397      {
1398        if (LEN)    // and Right will be a single value
1399        {
1400            chunksize=rightsize;
1401            leftstep=rightsize;
1402            rightstep=0;
1403            numsteps=m_left->getNumDPPSample();
1404            if (RS)
1405            {
1406               numsteps*=leftsize;
1407            }
1408        }
1409        else    // REN
1410        {
1411            chunksize=leftsize;
1412            rightstep=leftsize;
1413            leftstep=0;
1414            numsteps=m_right->getNumDPPSample();
1415            if (LS)
1416            {
1417               numsteps*=rightsize;
1418            }
1419        }
1420      }
1421    
1422      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1423      // Get the values of sub-expressions      // Get the values of sub-expressions
1424    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1425    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1426      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1427    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1428    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1429    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1430    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1431    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1432    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1433    
1434    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1435    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1436    
1437    
1438      roffset=m_samplesize*tid;
1439      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
1440    switch(m_op)    switch(m_op)
1441    {    {
1442      case ADD:      case ADD:
1443      PROC_OP(plus<double>());          PROC_OP(NO_ARG,plus<double>());
1444      break;      break;
1445      case SUB:      case SUB:
1446      PROC_OP(minus<double>());      PROC_OP(NO_ARG,minus<double>());
1447      break;      break;
1448      case MUL:      case MUL:
1449      PROC_OP(multiplies<double>());      PROC_OP(NO_ARG,multiplies<double>());
1450      break;      break;
1451      case DIV:      case DIV:
1452      PROC_OP(divides<double>());      PROC_OP(NO_ARG,divides<double>());
1453      break;      break;
1454      case POW:      case POW:
1455      PROC_OP(::pow);         PROC_OP(double (double,double),::pow);
1456      break;      break;
1457      default:      default:
1458      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1459    }    }
1460    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1461    return &v;    return &m_samples;
1462  }  }
1463    
1464    
1465    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1466    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1467    // unlike the other resolve helpers, we must treat these datapoints separately.
1468    const DataTypes::ValueType*
1469    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1470    {
1471    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1472    
1473  /*    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1474    \brief Compute the value of the expression for the given sample.      // first work out which of the children are expanded
1475    \return Vector which stores the value of the subexpression for the given sample.    bool leftExp=(m_left->m_readytype=='E');
1476    \param v A vector to store intermediate results.    bool rightExp=(m_right->m_readytype=='E');
1477    \param offset Index in v to begin storing results.    int steps=getNumDPPSample();
1478    \param sampleNo Sample number to evaluate.    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1479    \param roffset (output parameter) the offset in the return vector where the result begins.    int rightStep=(rightExp?m_right->getNoValues() : 0);
1480    
1481    The return value will be an existing vector so do not deallocate it.    int resultStep=getNoValues();
1482  */    roffset=m_samplesize*tid;
1483  // the vector and the offset are a place where the method could write its data if it wishes    size_t offset=roffset;
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1484    
1485  // the roffset is the offset within the returned vector where the data begins    const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1486  const DataTypes::ValueType*  
1487  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)    const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1488  {  
1489  cout << "Resolve sample " << toString() << endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1490      // collapse so we have a 'E' node or an IDENTITY for some other type  cout << getNoValues() << endl;)
1491    if (m_readytype!='E' && m_op!=IDENTITY)  
1492    {  
1493      collapse();  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1494    }  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1495    if (m_op==IDENTITY)    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1496    {  LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1497      const ValueType& vec=m_id->getVector();  LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1498      if (m_readytype=='C')  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1499      {  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1500      roffset=0;  
1501      return &(vec);    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
1502      }    switch(m_op)
     roffset=m_id->getPointOffset(sampleNo, 0);  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
1503    {    {
1504    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      case PROD:
1505    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      for (int i=0;i<steps;++i,resultp+=resultStep)
1506    default:      {
1507      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");            const double *ptr_0 = &((*left)[lroffset]);
1508              const double *ptr_1 = &((*right)[rroffset]);
1509    
1510    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1511    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1512    
1513              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1514    
1515          lroffset+=leftStep;
1516          rroffset+=rightStep;
1517        }
1518        break;
1519        default:
1520        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1521    }    }
1522      roffset=offset;
1523      return &m_samples;
1524    }
1525    
1526    
1527    const DataTypes::ValueType*
1528    DataLazy::resolveSample(int sampleNo, size_t& roffset)
1529    {
1530    #ifdef _OPENMP
1531        int tid=omp_get_thread_num();
1532    #else
1533        int tid=0;
1534    #endif
1535    
1536    #ifdef LAZY_STACK_PROF
1537        stackstart[tid]=&tid;
1538        stackend[tid]=&tid;
1539        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1540        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1541        #pragma omp critical
1542        if (d>maxstackuse)
1543        {
1544    cout << "Max resolve Stack use " << d << endl;
1545            maxstackuse=d;
1546        }
1547        return r;
1548    #else
1549        return resolveNodeSample(tid, sampleNo, roffset);
1550    #endif
1551    }
1552    
1553    
1554    // This needs to do the work of the identity constructor
1555    void
1556    DataLazy::resolveToIdentity()
1557    {
1558       if (m_op==IDENTITY)
1559        return;
1560       DataReady_ptr p=resolveNodeWorker();
1561       makeIdentity(p);
1562    }
1563    
1564    void DataLazy::makeIdentity(const DataReady_ptr& p)
1565    {
1566       m_op=IDENTITY;
1567       m_axis_offset=0;
1568       m_transpose=0;
1569       m_SL=m_SM=m_SR=0;
1570       m_children=m_height=0;
1571       m_id=p;
1572       if(p->isConstant()) {m_readytype='C';}
1573       else if(p->isExpanded()) {m_readytype='E';}
1574       else if (p->isTagged()) {m_readytype='T';}
1575       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1576       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1577       m_left.reset();
1578       m_right.reset();
1579  }  }
1580    
1581    
 // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
1582  DataReady_ptr  DataReady_ptr
1583  DataLazy::resolve()  DataLazy::resolve()
1584  {  {
1585        resolveToIdentity();
1586        return m_id;
1587    }
1588    
1589  cout << "Sample size=" << m_samplesize << endl;  // This version of resolve uses storage in each node to hold results
1590  cout << "Buffers=" << m_buffsRequired << endl;  DataReady_ptr
1591    DataLazy::resolveNodeWorker()
1592    {
1593    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1594    {    {
1595      collapse();      collapse();
# Line 738  cout << "Buffers=" << m_buffsRequired << Line 1599  cout << "Buffers=" << m_buffsRequired <<
1599      return m_id;      return m_id;
1600    }    }
1601      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired));    // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
   int threadnum=0;  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1602    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1603    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1604    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1605    
1606    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1607    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1608    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1609    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1610    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1611    {    {
1612  cout << "################################# " << sample << endl;      size_t roffset=0;
1613    #ifdef LAZY_STACK_PROF
1614        stackstart[omp_get_thread_num()]=&roffset;
1615        stackend[omp_get_thread_num()]=&roffset;
1616    #endif
1617        #pragma omp for schedule(static)
1618        for (sample=0;sample<totalsamples;++sample)
1619        {
1620            roffset=0;
1621  #ifdef _OPENMP  #ifdef _OPENMP
1622      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1623  #else  #else
1624      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1625  #endif  #endif
1626  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1627      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1628  cerr << "offset=" << outoffset << endl;              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1629      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1630      {      }
1631      resvec[outoffset]=(*res)[resoffset];    }
1632      }  #ifdef LAZY_STACK_PROF
1633  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1634      {
1635        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1636    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1637        if (r>maxstackuse)
1638        {
1639            maxstackuse=r;
1640        }
1641    }    }
1642      cout << "Max resolve Stack use=" << maxstackuse << endl;
1643    #endif
1644    return resptr;    return resptr;
1645  }  }
1646    
# Line 781  DataLazy::toString() const Line 1649  DataLazy::toString() const
1649  {  {
1650    ostringstream oss;    ostringstream oss;
1651    oss << "Lazy Data:";    oss << "Lazy Data:";
1652    intoString(oss);    if (escriptParams.getPRINT_LAZY_TREE()==0)
1653      {
1654          intoString(oss);
1655      }
1656      else
1657      {
1658        oss << endl;
1659        intoTreeString(oss,"");
1660      }
1661    return oss.str();    return oss.str();
1662  }  }
1663    
# Line 789  DataLazy::toString() const Line 1665  DataLazy::toString() const
1665  void  void
1666  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1667  {  {
1668    //    oss << "[" << m_children <<";"<<m_height <<"]";
1669    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1670    {    {
1671    case G_IDENTITY:    case G_IDENTITY:
# Line 818  DataLazy::intoString(ostringstream& oss) Line 1695  DataLazy::intoString(ostringstream& oss)
1695      oss << ')';      oss << ')';
1696      break;      break;
1697    case G_UNARY:    case G_UNARY:
1698      case G_UNARY_P:
1699      case G_NP1OUT:
1700      case G_NP1OUT_P:
1701      case G_REDUCTION:
1702        oss << opToString(m_op) << '(';
1703        m_left->intoString(oss);
1704        oss << ')';
1705        break;
1706      case G_TENSORPROD:
1707        oss << opToString(m_op) << '(';
1708        m_left->intoString(oss);
1709        oss << ", ";
1710        m_right->intoString(oss);
1711        oss << ')';
1712        break;
1713      case G_NP1OUT_2P:
1714      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1715      m_left->intoString(oss);      m_left->intoString(oss);
1716        oss << ", " << m_axis_offset << ", " << m_transpose;
1717      oss << ')';      oss << ')';
1718      break;      break;
1719    default:    default:
# Line 827  DataLazy::intoString(ostringstream& oss) Line 1721  DataLazy::intoString(ostringstream& oss)
1721    }    }
1722  }  }
1723    
1724  // Note that in this case, deepCopy does not make copies of the leaves.  
1725  // Hopefully copy on write (or whatever we end up using) will take care of this.  void
1726    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1727    {
1728      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1729      switch (getOpgroup(m_op))
1730      {
1731      case G_IDENTITY:
1732        if (m_id->isExpanded())
1733        {
1734           oss << "E";
1735        }
1736        else if (m_id->isTagged())
1737        {
1738          oss << "T";
1739        }
1740        else if (m_id->isConstant())
1741        {
1742          oss << "C";
1743        }
1744        else
1745        {
1746          oss << "?";
1747        }
1748        oss << '@' << m_id.get() << endl;
1749        break;
1750      case G_BINARY:
1751        oss << opToString(m_op) << endl;
1752        indent+='.';
1753        m_left->intoTreeString(oss, indent);
1754        m_right->intoTreeString(oss, indent);
1755        break;
1756      case G_UNARY:
1757      case G_UNARY_P:
1758      case G_NP1OUT:
1759      case G_NP1OUT_P:
1760      case G_REDUCTION:
1761        oss << opToString(m_op) << endl;
1762        indent+='.';
1763        m_left->intoTreeString(oss, indent);
1764        break;
1765      case G_TENSORPROD:
1766        oss << opToString(m_op) << endl;
1767        indent+='.';
1768        m_left->intoTreeString(oss, indent);
1769        m_right->intoTreeString(oss, indent);
1770        break;
1771      case G_NP1OUT_2P:
1772        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1773        indent+='.';
1774        m_left->intoTreeString(oss, indent);
1775        break;
1776      default:
1777        oss << "UNKNOWN";
1778      }
1779    }
1780    
1781    
1782  DataAbstract*  DataAbstract*
1783  DataLazy::deepCopy()  DataLazy::deepCopy()
1784  {  {
1785    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1786    {    {
1787      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1788      case G_UNARY:
1789      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1790      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1791      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1792      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1793      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1794      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1795      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1796      default:
1797        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1798    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
1799  }  }
1800    
1801    
1802    
1803    // There is no single, natural interpretation of getLength on DataLazy.
1804    // Instances of DataReady can look at the size of their vectors.
1805    // For lazy though, it could be the size the data would be if it were resolved;
1806    // or it could be some function of the lengths of the DataReady instances which
1807    // form part of the expression.
1808    // Rather than have people making assumptions, I have disabled the method.
1809  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1810  DataLazy::getLength() const  DataLazy::getLength() const
1811  {  {
1812    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
1813  }  }
1814    
1815    
# Line 853  DataLazy::getSlice(const DataTypes::Regi Line 1819  DataLazy::getSlice(const DataTypes::Regi
1819    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
1820  }  }
1821    
1822    
1823    // To do this we need to rely on our child nodes
1824    DataTypes::ValueType::size_type
1825    DataLazy::getPointOffset(int sampleNo,
1826                     int dataPointNo)
1827    {
1828      if (m_op==IDENTITY)
1829      {
1830        return m_id->getPointOffset(sampleNo,dataPointNo);
1831      }
1832      if (m_readytype!='E')
1833      {
1834        collapse();
1835        return m_id->getPointOffset(sampleNo,dataPointNo);
1836      }
1837      // at this point we do not have an identity node and the expression will be Expanded
1838      // so we only need to know which child to ask
1839      if (m_left->m_readytype=='E')
1840      {
1841        return m_left->getPointOffset(sampleNo,dataPointNo);
1842      }
1843      else
1844      {
1845        return m_right->getPointOffset(sampleNo,dataPointNo);
1846      }
1847    }
1848    
1849    // To do this we need to rely on our child nodes
1850  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
1851  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1852                   int dataPointNo) const                   int dataPointNo) const
1853  {  {
1854    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
1855      {
1856        return m_id->getPointOffset(sampleNo,dataPointNo);
1857      }
1858      if (m_readytype=='E')
1859      {
1860        // at this point we do not have an identity node and the expression will be Expanded
1861        // so we only need to know which child to ask
1862        if (m_left->m_readytype=='E')
1863        {
1864        return m_left->getPointOffset(sampleNo,dataPointNo);
1865        }
1866        else
1867        {
1868        return m_right->getPointOffset(sampleNo,dataPointNo);
1869        }
1870      }
1871      if (m_readytype=='C')
1872      {
1873        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1874      }
1875      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1876  }  }
1877    
1878  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
1879  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
1880  void  void
1881  DataLazy::setToZero()  DataLazy::setToZero()
1882  {  {
1883    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
1884    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1885    m_op=IDENTITY;  //   m_op=IDENTITY;
1886    m_right.reset();    //   m_right.reset();  
1887    m_left.reset();  //   m_left.reset();
1888    m_readytype='C';  //   m_readytype='C';
1889    m_buffsRequired=1;  //   m_buffsRequired=1;
1890    
1891      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
1892      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1893    }
1894    
1895    bool
1896    DataLazy::actsExpanded() const
1897    {
1898        return (m_readytype=='E');
1899  }  }
1900    
1901  }   // end namespace  }   // end namespace

Legend:
Removed from v.1910  
changed lines
  Added in v.2777

  ViewVC Help
Powered by ViewVC 1.1.26