/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1910 by jfenwick, Thu Oct 23 03:05:28 2008 UTC trunk/escript/src/DataLazy.cpp revision 3317 by caltinay, Thu Oct 28 00:50:41 2010 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2010 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 13  Line 13 
13    
14    
15  #include "DataLazy.h"  #include "DataLazy.h"
16  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
17  #ifdef _OPENMP  #ifdef _OPENMP
18  #include <omp.h>  #include <omp.h>
19  #endif  #endif
# Line 26  Line 21 
21  #include "DataTypes.h"  #include "DataTypes.h"
22  #include "Data.h"  #include "Data.h"
23  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
24    #include "Utils.h"
25    
26    #include "EscriptParams.h"
27    
28    #ifdef USE_NETCDF
29    #include <netcdfcpp.h>
30    #endif
31    
32    #include <iomanip>      // for some fancy formatting in debug
33    
34    // #define LAZYDEBUG(X) if (privdebug){X;}
35    #define LAZYDEBUG(X)
36    namespace
37    {
38    bool privdebug=false;
39    
40    #define ENABLEDEBUG privdebug=true;
41    #define DISABLEDEBUG privdebug=false;
42    }
43    
44    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
45    
46    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
47    
48    
49    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
50    
51  /*  /*
52  How does DataLazy work?  How does DataLazy work?
# Line 47  I will refer to individual DataLazy obje Line 68  I will refer to individual DataLazy obje
68  Each node also stores:  Each node also stores:
69  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
70      evaluated.      evaluated.
 - m_length ~ how many values would be stored in the answer if the expression was evaluated.  
71  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
72      evaluate the expression.      evaluate the expression.
73  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
# Line 69  The convention that I use, is that the r Line 89  The convention that I use, is that the r
89    
90  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
91  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
92    
93    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
94    1) Add to the ES_optype.
95    2) determine what opgroup your operation belongs to (X)
96    3) add a string for the op to the end of ES_opstrings
97    4) increase ES_opcount
98    5) add an entry (X) to opgroups
99    6) add an entry to the switch in collapseToReady
100    7) add an entry to resolveX
101  */  */
102    
103    
# Line 78  using namespace boost; Line 107  using namespace boost;
107  namespace escript  namespace escript
108  {  {
109    
 const std::string&  
 opToString(ES_optype op);  
   
110  namespace  namespace
111  {  {
112    
113    
114    // enabling this will print out when ever the maximum stacksize used by resolve increases
115    // it assumes _OPENMP is also in use
116    //#define LAZY_STACK_PROF
117    
118    
119    
120    #ifndef _OPENMP
121      #ifdef LAZY_STACK_PROF
122      #undef LAZY_STACK_PROF
123      #endif
124    #endif
125    
126    
127    #ifdef LAZY_STACK_PROF
128    std::vector<void*> stackstart(getNumberOfThreads());
129    std::vector<void*> stackend(getNumberOfThreads());
130    size_t maxstackuse=0;
131    #endif
132    
133  enum ES_opgroup  enum ES_opgroup
134  {  {
135     G_UNKNOWN,     G_UNKNOWN,
136     G_IDENTITY,     G_IDENTITY,
137     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
138     G_UNARY      // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
139       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
140       G_NP1OUT,        // non-pointwise op with one output
141       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
142       G_TENSORPROD,    // general tensor product
143       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
144       G_REDUCTION,     // non-pointwise unary op with a scalar output
145       G_CONDEVAL
146  };  };
147    
148    
# Line 100  string ES_opstrings[]={"UNKNOWN","IDENTI Line 153  string ES_opstrings[]={"UNKNOWN","IDENTI
153              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
154              "asinh","acosh","atanh",              "asinh","acosh","atanh",
155              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
156              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
157  int ES_opcount=32;              "symmetric","nonsymmetric",
158                "prod",
159                "transpose", "trace",
160                "swapaxes",
161                "minval", "maxval",
162                "condEval"};
163    int ES_opcount=44;
164  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
165              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
166              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
167              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
168              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
169              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
170                G_NP1OUT,G_NP1OUT,
171                G_TENSORPROD,
172                G_NP1OUT_P, G_NP1OUT_P,
173                G_NP1OUT_2P,
174                G_REDUCTION, G_REDUCTION,
175                G_CONDEVAL};
176  inline  inline
177  ES_opgroup  ES_opgroup
178  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 124  resultFS(DataAbstract_ptr left, DataAbst Line 189  resultFS(DataAbstract_ptr left, DataAbst
189      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
190      // programming error exception.      // programming error exception.
191    
192      FunctionSpace l=left->getFunctionSpace();
193      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
194      {    if (l!=r)
195          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
196      }      if (r.probeInterpolation(l))
197      return left->getFunctionSpace();      {
198        return l;
199        }
200        if (l.probeInterpolation(r))
201        {
202        return r;
203        }
204        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
205      }
206      return l;
207  }  }
208    
209  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
210    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
211  DataTypes::ShapeType  DataTypes::ShapeType
212  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
213  {  {
214      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
215      {      {
216        if (getOpgroup(op)!=G_BINARY)        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
217        {        {
218          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
219        }        }
220    
221        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
222        {        {
223          return right->getShape();          return right->getShape();
# Line 155  resultShape(DataAbstract_ptr left, DataA Line 231  resultShape(DataAbstract_ptr left, DataA
231      return left->getShape();      return left->getShape();
232  }  }
233    
234  // determine the number of points in the result of "left op right"  // return the shape for "op left"
235  size_t  
236  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataTypes::ShapeType
237    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
238  {  {
239     switch (getOpgroup(op))      switch(op)
240     {      {
241     case G_BINARY: return left->getLength();          case TRANS:
242     case G_UNARY: return left->getLength();         {            // for the scoping of variables
243     default:          const DataTypes::ShapeType& s=left->getShape();
244      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
245     }          int rank=left->getRank();
246            if (axis_offset<0 || axis_offset>rank)
247            {
248                stringstream e;
249                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
250                throw DataException(e.str());
251            }
252            for (int i=0; i<rank; i++)
253            {
254               int index = (axis_offset+i)%rank;
255               sh.push_back(s[index]); // Append to new shape
256            }
257            return sh;
258           }
259        break;
260        case TRACE:
261           {
262            int rank=left->getRank();
263            if (rank<2)
264            {
265               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
266            }
267            if ((axis_offset>rank-2) || (axis_offset<0))
268            {
269               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
270            }
271            if (rank==2)
272            {
273               return DataTypes::scalarShape;
274            }
275            else if (rank==3)
276            {
277               DataTypes::ShapeType sh;
278                   if (axis_offset==0)
279               {
280                    sh.push_back(left->getShape()[2]);
281                   }
282                   else     // offset==1
283               {
284                sh.push_back(left->getShape()[0]);
285                   }
286               return sh;
287            }
288            else if (rank==4)
289            {
290               DataTypes::ShapeType sh;
291               const DataTypes::ShapeType& s=left->getShape();
292                   if (axis_offset==0)
293               {
294                    sh.push_back(s[2]);
295                    sh.push_back(s[3]);
296                   }
297                   else if (axis_offset==1)
298               {
299                    sh.push_back(s[0]);
300                    sh.push_back(s[3]);
301                   }
302               else     // offset==2
303               {
304                sh.push_back(s[0]);
305                sh.push_back(s[1]);
306               }
307               return sh;
308            }
309            else        // unknown rank
310            {
311               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
312            }
313           }
314        break;
315            default:
316        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
317        }
318  }  }
319    
320  // determine the number of samples requires to evaluate an expression combining left and right  DataTypes::ShapeType
321  int  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
322  {  {
323     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
324     {       // Some of the checks are probably redundant here
325     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
326     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
327     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
328     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
329      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
330     }       int rank=left->getRank();
331         if (rank<2) {
332            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
333         }
334         if (axis0<0 || axis0>rank-1) {
335            stringstream e;
336            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
337            throw DataException(e.str());
338         }
339         if (axis1<0 || axis1>rank-1) {
340            stringstream e;
341            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
342            throw DataException(e.str());
343         }
344         if (axis0 == axis1) {
345             throw DataException("Error - Data::swapaxes: axis indices must be different.");
346         }
347         if (axis0 > axis1) {
348             axis0_tmp=axis1;
349             axis1_tmp=axis0;
350         } else {
351             axis0_tmp=axis0;
352             axis1_tmp=axis1;
353         }
354         for (int i=0; i<rank; i++) {
355           if (i == axis0_tmp) {
356              out_shape.push_back(s[axis1_tmp]);
357           } else if (i == axis1_tmp) {
358              out_shape.push_back(s[axis0_tmp]);
359           } else {
360              out_shape.push_back(s[i]);
361           }
362         }
363        return out_shape;
364  }  }
365    
366    
367    // determine the output shape for the general tensor product operation
368    // the additional parameters return information required later for the product
369    // the majority of this code is copy pasted from C_General_Tensor_Product
370    DataTypes::ShapeType
371    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
372    {
373        
374      // Get rank and shape of inputs
375      int rank0 = left->getRank();
376      int rank1 = right->getRank();
377      const DataTypes::ShapeType& shape0 = left->getShape();
378      const DataTypes::ShapeType& shape1 = right->getShape();
379    
380      // Prepare for the loops of the product and verify compatibility of shapes
381      int start0=0, start1=0;
382      if (transpose == 0)       {}
383      else if (transpose == 1)  { start0 = axis_offset; }
384      else if (transpose == 2)  { start1 = rank1-axis_offset; }
385      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
386    
387      if (rank0<axis_offset)
388      {
389        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
390      }
391    
392      // Adjust the shapes for transpose
393      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
394      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
395      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
396      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
397    
398      // Prepare for the loops of the product
399      SL=1, SM=1, SR=1;
400      for (int i=0; i<rank0-axis_offset; i++)   {
401        SL *= tmpShape0[i];
402      }
403      for (int i=rank0-axis_offset; i<rank0; i++)   {
404        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
405          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
406        }
407        SM *= tmpShape0[i];
408      }
409      for (int i=axis_offset; i<rank1; i++)     {
410        SR *= tmpShape1[i];
411      }
412    
413      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
414      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
415      {         // block to limit the scope of out_index
416         int out_index=0;
417         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
418         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
419      }
420    
421      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
422      {
423         ostringstream os;
424         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
425         throw DataException(os.str());
426      }
427    
428      return shape2;
429    }
430    
431  }   // end anonymous namespace  }   // end anonymous namespace
432    
433    
# Line 198  opToString(ES_optype op) Line 443  opToString(ES_optype op)
443    return ES_opstrings[op];    return ES_opstrings[op];
444  }  }
445    
446    void DataLazy::LazyNodeSetup()
447    {
448    #ifdef _OPENMP
449        int numthreads=omp_get_max_threads();
450        m_samples.resize(numthreads*m_samplesize);
451        m_sampleids=new int[numthreads];
452        for (int i=0;i<numthreads;++i)
453        {
454            m_sampleids[i]=-1;  
455        }
456    #else
457        m_samples.resize(m_samplesize);
458        m_sampleids=new int[1];
459        m_sampleids[0]=-1;
460    #endif  // _OPENMP
461    }
462    
463    
464    // Creates an identity node
465  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
466      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
467      m_op(IDENTITY)      ,m_sampleids(0),
468        m_samples(1)
469  {  {
470     if (p->isLazy())     if (p->isLazy())
471     {     {
# Line 212  DataLazy::DataLazy(DataAbstract_ptr p) Line 476  DataLazy::DataLazy(DataAbstract_ptr p)
476     }     }
477     else     else
478     {     {
479      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
480      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
481      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
482      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
483     }     }
484     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
485  }  }
486    
   
   
   
487  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
488      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
489      m_op(op)      m_op(op),
490        m_axis_offset(0),
491        m_transpose(0),
492        m_SL(0), m_SM(0), m_SR(0)
493  {  {
494     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
495     {     {
496      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
497     }     }
498    
499     DataLazy_ptr lleft;     DataLazy_ptr lleft;
500     if (!left->isLazy())     if (!left->isLazy())
501     {     {
# Line 245  DataLazy::DataLazy(DataAbstract_ptr left Line 506  DataLazy::DataLazy(DataAbstract_ptr left
506      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
507     }     }
508     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
509     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
510     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
511       m_children=m_left->m_children+1;
512       m_height=m_left->m_height+1;
513       LazyNodeSetup();
514       SIZELIMIT
515  }  }
516    
517    
518    // In this constructor we need to consider interpolation
519  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
520      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
521      m_op(op)      m_op(op),
522        m_SL(0), m_SM(0), m_SR(0)
523  {  {
524     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
525       if ((getOpgroup(op)!=G_BINARY))
526     {     {
527      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
528     }     }
529    
530       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
531       {
532        FunctionSpace fs=getFunctionSpace();
533        Data ltemp(left);
534        Data tmp(ltemp,fs);
535        left=tmp.borrowDataPtr();
536       }
537       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
538       {
539        Data tmp(Data(right),getFunctionSpace());
540        right=tmp.borrowDataPtr();
541    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
542       }
543       left->operandCheck(*right);
544    
545     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
546     {     {
547      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
548    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
549     }     }
550     else     else
551     {     {
552      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
553    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
554     }     }
555     if (right->isLazy())     if (right->isLazy())
556     {     {
557      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
558    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
559     }     }
560     else     else
561     {     {
562      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
563    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
564     }     }
565     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
566     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 290  DataLazy::DataLazy(DataAbstract_ptr left Line 576  DataLazy::DataLazy(DataAbstract_ptr left
576     {     {
577      m_readytype='C';      m_readytype='C';
578     }     }
579     m_length=resultLength(m_left,m_right,m_op);     m_samplesize=getNumDPPSample()*getNoValues();
580     m_samplesize=getNumDPPSample()*getNoValues();         m_children=m_left->m_children+m_right->m_children+2;
581     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
582  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
583       SIZELIMIT
584    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
585  }  }
586    
587    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
588        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
589        m_op(op),
590        m_axis_offset(axis_offset),
591        m_transpose(transpose)
592    {
593       if ((getOpgroup(op)!=G_TENSORPROD))
594       {
595        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
596       }
597       if ((transpose>2) || (transpose<0))
598       {
599        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
600       }
601       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
602       {
603        FunctionSpace fs=getFunctionSpace();
604        Data ltemp(left);
605        Data tmp(ltemp,fs);
606        left=tmp.borrowDataPtr();
607       }
608       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
609       {
610        Data tmp(Data(right),getFunctionSpace());
611        right=tmp.borrowDataPtr();
612       }
613    //    left->operandCheck(*right);
614    
615  DataLazy::~DataLazy()     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
616       {
617        m_left=dynamic_pointer_cast<DataLazy>(left);
618       }
619       else
620       {
621        m_left=DataLazy_ptr(new DataLazy(left));
622       }
623       if (right->isLazy())
624       {
625        m_right=dynamic_pointer_cast<DataLazy>(right);
626       }
627       else
628       {
629        m_right=DataLazy_ptr(new DataLazy(right));
630       }
631       char lt=m_left->m_readytype;
632       char rt=m_right->m_readytype;
633       if (lt=='E' || rt=='E')
634       {
635        m_readytype='E';
636       }
637       else if (lt=='T' || rt=='T')
638       {
639        m_readytype='T';
640       }
641       else
642       {
643        m_readytype='C';
644       }
645       m_samplesize=getNumDPPSample()*getNoValues();
646       m_children=m_left->m_children+m_right->m_children+2;
647       m_height=max(m_left->m_height,m_right->m_height)+1;
648       LazyNodeSetup();
649       SIZELIMIT
650    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
651    }
652    
653    
654    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
655        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
656        m_op(op),
657        m_axis_offset(axis_offset),
658        m_transpose(0),
659        m_tol(0)
660  {  {
661       if ((getOpgroup(op)!=G_NP1OUT_P))
662       {
663        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
664       }
665       DataLazy_ptr lleft;
666       if (!left->isLazy())
667       {
668        lleft=DataLazy_ptr(new DataLazy(left));
669       }
670       else
671       {
672        lleft=dynamic_pointer_cast<DataLazy>(left);
673       }
674       m_readytype=lleft->m_readytype;
675       m_left=lleft;
676       m_samplesize=getNumDPPSample()*getNoValues();
677       m_children=m_left->m_children+1;
678       m_height=m_left->m_height+1;
679       LazyNodeSetup();
680       SIZELIMIT
681    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
682    }
683    
684    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
685        : parent(left->getFunctionSpace(), left->getShape()),
686        m_op(op),
687        m_axis_offset(0),
688        m_transpose(0),
689        m_tol(tol)
690    {
691       if ((getOpgroup(op)!=G_UNARY_P))
692       {
693        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
694       }
695       DataLazy_ptr lleft;
696       if (!left->isLazy())
697       {
698        lleft=DataLazy_ptr(new DataLazy(left));
699       }
700       else
701       {
702        lleft=dynamic_pointer_cast<DataLazy>(left);
703       }
704       m_readytype=lleft->m_readytype;
705       m_left=lleft;
706       m_samplesize=getNumDPPSample()*getNoValues();
707       m_children=m_left->m_children+1;
708       m_height=m_left->m_height+1;
709       LazyNodeSetup();
710       SIZELIMIT
711    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
712    }
713    
714    
715    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
716        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
717        m_op(op),
718        m_axis_offset(axis0),
719        m_transpose(axis1),
720        m_tol(0)
721    {
722       if ((getOpgroup(op)!=G_NP1OUT_2P))
723       {
724        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
725       }
726       DataLazy_ptr lleft;
727       if (!left->isLazy())
728       {
729        lleft=DataLazy_ptr(new DataLazy(left));
730       }
731       else
732       {
733        lleft=dynamic_pointer_cast<DataLazy>(left);
734       }
735       m_readytype=lleft->m_readytype;
736       m_left=lleft;
737       m_samplesize=getNumDPPSample()*getNoValues();
738       m_children=m_left->m_children+1;
739       m_height=m_left->m_height+1;
740       LazyNodeSetup();
741       SIZELIMIT
742    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
743  }  }
744    
745    
746  int  namespace
 DataLazy::getBuffsRequired() const  
747  {  {
748      return m_buffsRequired;  
749        inline int max3(int a, int b, int c)
750        {
751        int t=(a>b?a:b);
752        return (t>c?t:c);
753    
754        }
755    }
756    
757    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
758        : parent(left->getFunctionSpace(), left->getShape()),
759        m_op(CONDEVAL),
760        m_axis_offset(0),
761        m_transpose(0),
762        m_tol(0)
763    {
764    
765       DataLazy_ptr lmask;
766       DataLazy_ptr lleft;
767       DataLazy_ptr lright;
768       if (!mask->isLazy())
769       {
770        lmask=DataLazy_ptr(new DataLazy(mask));
771       }
772       else
773       {
774        lmask=dynamic_pointer_cast<DataLazy>(mask);
775       }
776       if (!left->isLazy())
777       {
778        lleft=DataLazy_ptr(new DataLazy(left));
779       }
780       else
781       {
782        lleft=dynamic_pointer_cast<DataLazy>(left);
783       }
784       if (!right->isLazy())
785       {
786        lright=DataLazy_ptr(new DataLazy(right));
787       }
788       else
789       {
790        lright=dynamic_pointer_cast<DataLazy>(right);
791       }
792       m_readytype=lmask->m_readytype;
793       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
794       {
795        throw DataException("Programmer Error - condEval arguments must have the same readytype");
796       }
797       m_left=lleft;
798       m_right=lright;
799       m_mask=lmask;
800       m_samplesize=getNumDPPSample()*getNoValues();
801       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
802       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
803       LazyNodeSetup();
804       SIZELIMIT
805    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
806    }
807    
808    
809    
810    DataLazy::~DataLazy()
811    {
812       delete[] m_sampleids;
813  }  }
814    
815    
# Line 328  DataLazy::collapseToReady() Line 832  DataLazy::collapseToReady()
832    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
833    Data left(pleft);    Data left(pleft);
834    Data right;    Data right;
835    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
836    {    {
837      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
838    }    }
# Line 427  DataLazy::collapseToReady() Line 931  DataLazy::collapseToReady()
931      case LEZ:      case LEZ:
932      result=left.whereNonPositive();      result=left.whereNonPositive();
933      break;      break;
934        case NEZ:
935        result=left.whereNonZero(m_tol);
936        break;
937        case EZ:
938        result=left.whereZero(m_tol);
939        break;
940        case SYM:
941        result=left.symmetric();
942        break;
943        case NSYM:
944        result=left.nonsymmetric();
945        break;
946        case PROD:
947        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
948        break;
949        case TRANS:
950        result=left.transpose(m_axis_offset);
951        break;
952        case TRACE:
953        result=left.trace(m_axis_offset);
954        break;
955        case SWAP:
956        result=left.swapaxes(m_axis_offset, m_transpose);
957        break;
958        case MINVAL:
959        result=left.minval();
960        break;
961        case MAXVAL:
962        result=left.minval();
963        break;
964      default:      default:
965      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
966    }    }
967    return result.borrowReadyPtr();    return result.borrowReadyPtr();
968  }  }
# Line 454  DataLazy::collapse() Line 988  DataLazy::collapse()
988    m_op=IDENTITY;    m_op=IDENTITY;
989  }  }
990    
991  /*  
992    \brief Compute the value of the expression (binary operation) for the given sample.  
993    \return Vector which stores the value of the subexpression for the given sample.  
994    \param v A vector to store intermediate results.  
995    \param offset Index in v to begin storing results.  
996    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
997    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
998        {\
999    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1000    If the result is stored in v it should be stored at the offset given.        { \
1001    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1002  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1003  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1004  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1005             lroffset+=leftstep; \
1006             rroffset+=rightstep; \
1007          }\
1008          lroffset+=oleftstep;\
1009          rroffset+=orightstep;\
1010        }
1011    
1012    
1013    // The result will be stored in m_samples
1014    // The return value is a pointer to the DataVector, offset is the offset within the return value
1015    const DataTypes::ValueType*
1016    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1017    {
1018    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1019        // collapse so we have a 'E' node or an IDENTITY for some other type
1020      if (m_readytype!='E' && m_op!=IDENTITY)
1021      {
1022        collapse();
1023      }
1024      if (m_op==IDENTITY)  
1025      {
1026        const ValueType& vec=m_id->getVectorRO();
1027        roffset=m_id->getPointOffset(sampleNo, 0);
1028    #ifdef LAZY_STACK_PROF
1029    int x;
1030    if (&x<stackend[omp_get_thread_num()])
1031    {
1032           stackend[omp_get_thread_num()]=&x;
1033    }
1034    #endif
1035        return &(vec);
1036      }
1037      if (m_readytype!='E')
1038      {
1039        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1040      }
1041      if (m_sampleids[tid]==sampleNo)
1042      {
1043        roffset=tid*m_samplesize;
1044        return &(m_samples);        // sample is already resolved
1045      }
1046      m_sampleids[tid]=sampleNo;
1047    
1048      switch (getOpgroup(m_op))
1049      {
1050      case G_UNARY:
1051      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1052      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1053      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1054      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1055      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1056      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1057      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1058      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1059      default:
1060        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1061      }
1062    }
1063    
1064    const DataTypes::ValueType*
1065    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1066  {  {
1067      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1068      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1069      // processing single points.      // processing single points.
1070        // we will also know we won't get identity nodes
1071    if (m_readytype!='E')    if (m_readytype!='E')
1072    {    {
1073      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1074    }    }
1075    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1076    const double* left=&((*vleft)[roffset]);    {
1077    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1078    roffset=offset;    }
1079      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1080      const double* left=&((*leftres)[roffset]);
1081      roffset=m_samplesize*tid;
1082      double* result=&(m_samples[roffset]);
1083    switch (m_op)    switch (m_op)
1084    {    {
1085      case SIN:        case SIN:  
1086      tensor_unary_operation(m_samplesize, left, result, ::sin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1087      break;      break;
1088      case COS:      case COS:
1089      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1090      break;      break;
1091      case TAN:      case TAN:
1092      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1093      break;      break;
1094      case ASIN:      case ASIN:
1095      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1096      break;      break;
1097      case ACOS:      case ACOS:
1098      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1099      break;      break;
1100      case ATAN:      case ATAN:
1101      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1102      break;      break;
1103      case SINH:      case SINH:
1104      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1105      break;      break;
1106      case COSH:      case COSH:
1107      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1108      break;      break;
1109      case TANH:      case TANH:
1110      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1111      break;      break;
1112      case ERF:      case ERF:
1113  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1114      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1115  #else  #else
1116      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1117      break;      break;
1118  #endif  #endif
1119     case ASINH:     case ASINH:
1120  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1121      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1122  #else  #else
1123      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1124  #endif    #endif  
1125      break;      break;
1126     case ACOSH:     case ACOSH:
1127  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1128      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1129  #else  #else
1130      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1131  #endif    #endif  
1132      break;      break;
1133     case ATANH:     case ATANH:
1134  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1135      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1136  #else  #else
1137      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1138  #endif    #endif  
1139      break;      break;
1140      case LOG10:      case LOG10:
1141      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1142      break;      break;
1143      case LOG:      case LOG:
1144      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1145      break;      break;
1146      case SIGN:      case SIGN:
1147      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1148      break;      break;
1149      case ABS:      case ABS:
1150      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1151      break;      break;
1152      case NEG:      case NEG:
1153      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 558  DataLazy::resolveUnary(ValueType& v, siz Line 1158  DataLazy::resolveUnary(ValueType& v, siz
1158      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1159      break;      break;
1160      case EXP:      case EXP:
1161      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1162      break;      break;
1163      case SQRT:      case SQRT:
1164      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1165      break;      break;
1166      case RECIP:      case RECIP:
1167      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 578  DataLazy::resolveUnary(ValueType& v, siz Line 1178  DataLazy::resolveUnary(ValueType& v, siz
1178      case LEZ:      case LEZ:
1179      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1180      break;      break;
1181    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1182        case NEZ:
1183        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1184        break;
1185        case EZ:
1186        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1187        break;
1188    
1189      default:      default:
1190      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1191    }    }
1192    return &v;    return &(m_samples);
1193  }  }
1194    
1195    
1196    const DataTypes::ValueType*
1197    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1198    {
1199        // we assume that any collapsing has been done before we get here
1200        // since we only have one argument we don't need to think about only
1201        // processing single points.
1202        // we will also know we won't get identity nodes
1203      if (m_readytype!='E')
1204      {
1205        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1206      }
1207      if (m_op==IDENTITY)
1208      {
1209        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1210      }
1211      size_t loffset=0;
1212      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1213    
1214      roffset=m_samplesize*tid;
1215      unsigned int ndpps=getNumDPPSample();
1216      unsigned int psize=DataTypes::noValues(getShape());
1217      double* result=&(m_samples[roffset]);
1218      switch (m_op)
1219      {
1220        case MINVAL:
1221        {
1222          for (unsigned int z=0;z<ndpps;++z)
1223          {
1224            FMin op;
1225            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1226            loffset+=psize;
1227            result++;
1228          }
1229        }
1230        break;
1231        case MAXVAL:
1232        {
1233          for (unsigned int z=0;z<ndpps;++z)
1234          {
1235          FMax op;
1236          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1237          loffset+=psize;
1238          result++;
1239          }
1240        }
1241        break;
1242        default:
1243        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1244      }
1245      return &(m_samples);
1246    }
1247    
1248    const DataTypes::ValueType*
1249    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1250    {
1251        // we assume that any collapsing has been done before we get here
1252        // since we only have one argument we don't need to think about only
1253        // processing single points.
1254      if (m_readytype!='E')
1255      {
1256        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1257      }
1258      if (m_op==IDENTITY)
1259      {
1260        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1261      }
1262      size_t subroffset;
1263      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1264      roffset=m_samplesize*tid;
1265      size_t loop=0;
1266      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1267      size_t step=getNoValues();
1268      size_t offset=roffset;
1269      switch (m_op)
1270      {
1271        case SYM:
1272        for (loop=0;loop<numsteps;++loop)
1273        {
1274            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1275            subroffset+=step;
1276            offset+=step;
1277        }
1278        break;
1279        case NSYM:
1280        for (loop=0;loop<numsteps;++loop)
1281        {
1282            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1283            subroffset+=step;
1284            offset+=step;
1285        }
1286        break;
1287        default:
1288        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1289      }
1290      return &m_samples;
1291    }
1292    
1293    const DataTypes::ValueType*
1294    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1295    {
1296        // we assume that any collapsing has been done before we get here
1297        // since we only have one argument we don't need to think about only
1298        // processing single points.
1299      if (m_readytype!='E')
1300      {
1301        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1302      }
1303      if (m_op==IDENTITY)
1304      {
1305        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1306      }
1307      size_t subroffset;
1308      size_t offset;
1309      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1310      roffset=m_samplesize*tid;
1311      offset=roffset;
1312      size_t loop=0;
1313      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1314      size_t outstep=getNoValues();
1315      size_t instep=m_left->getNoValues();
1316      switch (m_op)
1317      {
1318        case TRACE:
1319        for (loop=0;loop<numsteps;++loop)
1320        {
1321                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1322            subroffset+=instep;
1323            offset+=outstep;
1324        }
1325        break;
1326        case TRANS:
1327        for (loop=0;loop<numsteps;++loop)
1328        {
1329                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1330            subroffset+=instep;
1331            offset+=outstep;
1332        }
1333        break;
1334        default:
1335        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1336      }
1337      return &m_samples;
1338    }
1339    
1340    
1341  #define PROC_OP(X) \  const DataTypes::ValueType*
1342      for (int i=0;i<steps;++i,resultp+=resultStep) \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1343      { \  {
1344         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    if (m_readytype!='E')
1345         lroffset+=leftStep; \    {
1346         rroffset+=rightStep; \      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1347      }
1348      if (m_op==IDENTITY)
1349      {
1350        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1351      }
1352      size_t subroffset;
1353      size_t offset;
1354      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1355      roffset=m_samplesize*tid;
1356      offset=roffset;
1357      size_t loop=0;
1358      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1359      size_t outstep=getNoValues();
1360      size_t instep=m_left->getNoValues();
1361      switch (m_op)
1362      {
1363        case SWAP:
1364        for (loop=0;loop<numsteps;++loop)
1365        {
1366                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1367            subroffset+=instep;
1368            offset+=outstep;
1369      }      }
1370        break;
1371        default:
1372        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1373      }
1374      return &m_samples;
1375    }
1376    
1377    const DataTypes::ValueType*
1378    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1379    {
1380      if (m_readytype!='E')
1381      {
1382        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1383      }
1384      if (m_op!=CONDEVAL)
1385      {
1386        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1387      }
1388      size_t subroffset;
1389      size_t offset;
1390    
1391    
1392      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1393      const ValueType* srcres=0;
1394      if ((*maskres)[subroffset]>0)
1395      {
1396        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1397      }
1398      else
1399      {
1400        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1401      }
1402    
1403      // Now we need to copy the result
1404    
1405      roffset=m_samplesize*tid;
1406      offset=roffset;
1407      for (int i=0;i<m_samplesize;++i)
1408      {
1409        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1410      }
1411    
1412      return &m_samples;
1413    }
1414    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1415  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1416  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1417  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 618  DataLazy::resolveUnary(ValueType& v, siz Line 1421  DataLazy::resolveUnary(ValueType& v, siz
1421  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1422  // For example, 2+Vector.  // For example, 2+Vector.
1423  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1424  DataTypes::ValueType*  const DataTypes::ValueType*
1425  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1426  {  {
1427  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1428    
1429    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1430      // first work out which of the children are expanded      // first work out which of the children are expanded
1431    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1432    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1433    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1434    int steps=(bigloops?1:getNumDPPSample());    {
1435    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1436    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1437    {    bool leftScalar=(m_left->getRank()==0);
1438      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1439      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1440      chunksize=1;    // for scalar    {
1441    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1442    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1443    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1444    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1445      size_t chunksize=1;           // how many doubles will be processed in one go
1446      int leftstep=0;       // how far should the left offset advance after each step
1447      int rightstep=0;
1448      int numsteps=0;       // total number of steps for the inner loop
1449      int oleftstep=0;  // the o variables refer to the outer loop
1450      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1451      int onumsteps=1;
1452      
1453      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1454      bool RES=(rightExp && rightScalar);
1455      bool LS=(!leftExp && leftScalar); // left is a single scalar
1456      bool RS=(!rightExp && rightScalar);
1457      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1458      bool RN=(!rightExp && !rightScalar);
1459      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1460      bool REN=(rightExp && !rightScalar);
1461    
1462      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1463      {
1464        chunksize=m_left->getNumDPPSample()*leftsize;
1465        leftstep=0;
1466        rightstep=0;
1467        numsteps=1;
1468      }
1469      else if (LES || RES)
1470      {
1471        chunksize=1;
1472        if (LES)        // left is an expanded scalar
1473        {
1474            if (RS)
1475            {
1476               leftstep=1;
1477               rightstep=0;
1478               numsteps=m_left->getNumDPPSample();
1479            }
1480            else        // RN or REN
1481            {
1482               leftstep=0;
1483               oleftstep=1;
1484               rightstep=1;
1485               orightstep=(RN ? -(int)rightsize : 0);
1486               numsteps=rightsize;
1487               onumsteps=m_left->getNumDPPSample();
1488            }
1489        }
1490        else        // right is an expanded scalar
1491        {
1492            if (LS)
1493            {
1494               rightstep=1;
1495               leftstep=0;
1496               numsteps=m_right->getNumDPPSample();
1497            }
1498            else
1499            {
1500               rightstep=0;
1501               orightstep=1;
1502               leftstep=1;
1503               oleftstep=(LN ? -(int)leftsize : 0);
1504               numsteps=leftsize;
1505               onumsteps=m_right->getNumDPPSample();
1506            }
1507        }
1508      }
1509      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1510      {
1511        if (LEN)    // and Right will be a single value
1512        {
1513            chunksize=rightsize;
1514            leftstep=rightsize;
1515            rightstep=0;
1516            numsteps=m_left->getNumDPPSample();
1517            if (RS)
1518            {
1519               numsteps*=leftsize;
1520            }
1521        }
1522        else    // REN
1523        {
1524            chunksize=leftsize;
1525            rightstep=leftsize;
1526            leftstep=0;
1527            numsteps=m_right->getNumDPPSample();
1528            if (LS)
1529            {
1530               numsteps*=rightsize;
1531            }
1532        }
1533      }
1534    
1535      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1536      // Get the values of sub-expressions      // Get the values of sub-expressions
1537    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1538    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1539      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1540    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1541    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1542    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1543    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1544    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1545    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1546    
1547    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1548    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1549    
1550    
1551      roffset=m_samplesize*tid;
1552      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
1553    switch(m_op)    switch(m_op)
1554    {    {
1555      case ADD:      case ADD:
1556      PROC_OP(plus<double>());          PROC_OP(NO_ARG,plus<double>());
1557      break;      break;
1558      case SUB:      case SUB:
1559      PROC_OP(minus<double>());      PROC_OP(NO_ARG,minus<double>());
1560      break;      break;
1561      case MUL:      case MUL:
1562      PROC_OP(multiplies<double>());      PROC_OP(NO_ARG,multiplies<double>());
1563      break;      break;
1564      case DIV:      case DIV:
1565      PROC_OP(divides<double>());      PROC_OP(NO_ARG,divides<double>());
1566      break;      break;
1567      case POW:      case POW:
1568      PROC_OP(::pow);         PROC_OP(double (double,double),::pow);
1569      break;      break;
1570      default:      default:
1571      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1572    }    }
1573    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1574    return &v;    return &m_samples;
1575  }  }
1576    
1577    
1578    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1579    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1580    // unlike the other resolve helpers, we must treat these datapoints separately.
1581    const DataTypes::ValueType*
1582    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1583    {
1584    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1585    
1586  /*    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1587    \brief Compute the value of the expression for the given sample.      // first work out which of the children are expanded
1588    \return Vector which stores the value of the subexpression for the given sample.    bool leftExp=(m_left->m_readytype=='E');
1589    \param v A vector to store intermediate results.    bool rightExp=(m_right->m_readytype=='E');
1590    \param offset Index in v to begin storing results.    int steps=getNumDPPSample();
1591    \param sampleNo Sample number to evaluate.    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1592    \param roffset (output parameter) the offset in the return vector where the result begins.    int rightStep=(rightExp?m_right->getNoValues() : 0);
1593    
1594      int resultStep=getNoValues();
1595      roffset=m_samplesize*tid;
1596      size_t offset=roffset;
1597    
1598      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1599    
1600      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1601    
1602    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1603    cout << getNoValues() << endl;)
1604    
1605    
1606    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1607    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1608    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1609    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1610    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1611    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1612    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1613    
1614      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
1615      switch(m_op)
1616      {
1617        case PROD:
1618        for (int i=0;i<steps;++i,resultp+=resultStep)
1619        {
1620              const double *ptr_0 = &((*left)[lroffset]);
1621              const double *ptr_1 = &((*right)[rroffset]);
1622    
1623    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1624    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1625    
1626              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1627    
1628          lroffset+=leftStep;
1629          rroffset+=rightStep;
1630        }
1631        break;
1632        default:
1633        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1634      }
1635      roffset=offset;
1636      return &m_samples;
1637    }
1638    
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1639    
 // the roffset is the offset within the returned vector where the data begins  
1640  const DataTypes::ValueType*  const DataTypes::ValueType*
1641  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset)
1642  {  {
1643  cout << "Resolve sample " << toString() << endl;  #ifdef _OPENMP
1644      // collapse so we have a 'E' node or an IDENTITY for some other type      int tid=omp_get_thread_num();
1645    if (m_readytype!='E' && m_op!=IDENTITY)  #else
1646        int tid=0;
1647    #endif
1648    
1649    #ifdef LAZY_STACK_PROF
1650        stackstart[tid]=&tid;
1651        stackend[tid]=&tid;
1652        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1653        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1654        #pragma omp critical
1655        if (d>maxstackuse)
1656        {
1657    cout << "Max resolve Stack use " << d << endl;
1658            maxstackuse=d;
1659        }
1660        return r;
1661    #else
1662        return resolveNodeSample(tid, sampleNo, roffset);
1663    #endif
1664    }
1665    
1666    
1667    // This needs to do the work of the identity constructor
1668    void
1669    DataLazy::resolveToIdentity()
1670    {
1671       if (m_op==IDENTITY)
1672        return;
1673       DataReady_ptr p=resolveNodeWorker();
1674       makeIdentity(p);
1675    }
1676    
1677    void DataLazy::makeIdentity(const DataReady_ptr& p)
1678    {
1679       m_op=IDENTITY;
1680       m_axis_offset=0;
1681       m_transpose=0;
1682       m_SL=m_SM=m_SR=0;
1683       m_children=m_height=0;
1684       m_id=p;
1685       if(p->isConstant()) {m_readytype='C';}
1686       else if(p->isExpanded()) {m_readytype='E';}
1687       else if (p->isTagged()) {m_readytype='T';}
1688       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1689       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1690       m_left.reset();
1691       m_right.reset();
1692    }
1693    
1694    
1695    DataReady_ptr
1696    DataLazy::resolve()
1697    {
1698        resolveToIdentity();
1699        return m_id;
1700    }
1701    
1702    
1703    /* This is really a static method but I think that caused problems in windows */
1704    void
1705    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1706    {
1707      if (dats.empty())
1708    {    {
1709      collapse();      return;
1710    }    }
1711    if (m_op==IDENTITY)      vector<DataLazy*> work;
1712      FunctionSpace fs=dats[0]->getFunctionSpace();
1713      bool match=true;
1714      for (int i=dats.size()-1;i>=0;--i)
1715    {    {
1716      const ValueType& vec=m_id->getVector();      if (dats[i]->m_readytype!='E')
1717      if (m_readytype=='C')      {
1718      {          dats[i]->collapse();
1719      roffset=0;      }
1720      return &(vec);      if (dats[i]->m_op!=IDENTITY)
1721      }      {
1722      roffset=m_id->getPointOffset(sampleNo, 0);          work.push_back(dats[i]);
1723      return &(vec);          if (fs!=dats[i]->getFunctionSpace())
1724            {
1725                match=false;
1726            }
1727        }
1728    }    }
1729    if (m_readytype!='E')    if (work.empty())
1730    {    {
1731      throw DataException("Programmer Error - Collapse did not produce an expanded node.");      return;     // no work to do
1732    }    }
1733    switch (getOpgroup(m_op))    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1734      {     // it is possible that dats[0] is one of the objects which we discarded and
1735            // all the other functionspaces match.
1736        vector<DataExpanded*> dep;
1737        vector<ValueType*> vecs;
1738        for (int i=0;i<work.size();++i)
1739        {
1740            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1741            vecs.push_back(&(dep[i]->getVectorRW()));
1742        }
1743        int totalsamples=work[0]->getNumSamples();
1744        const ValueType* res=0; // Storage for answer
1745        int sample;
1746        #pragma omp parallel private(sample, res)
1747        {
1748            size_t roffset=0;
1749            #pragma omp for schedule(static)
1750            for (sample=0;sample<totalsamples;++sample)
1751            {
1752            roffset=0;
1753            int j;
1754            for (j=work.size()-1;j>=0;--j)
1755            {
1756    #ifdef _OPENMP
1757                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1758    #else
1759                    res=work[j]->resolveNodeSample(0,sample,roffset);
1760    #endif
1761                    DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1762                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1763            }
1764            }
1765        }
1766        // Now we need to load the new results as identity ops into the lazy nodes
1767        for (int i=work.size()-1;i>=0;--i)
1768        {
1769            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1770        }
1771      }
1772      else  // functionspaces do not match
1773    {    {
1774    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      for (int i=0;i<work.size();++i)
1775    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      {
1776    default:          work[i]->resolveToIdentity();
1777      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      }
1778    }    }
1779  }  }
1780    
1781    
1782  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1783  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1784  DataReady_ptr  DataReady_ptr
1785  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1786  {  {
   
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
1787    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1788    {    {
1789      collapse();      collapse();
# Line 738  cout << "Buffers=" << m_buffsRequired << Line 1793  cout << "Buffers=" << m_buffsRequired <<
1793      return m_id;      return m_id;
1794    }    }
1795      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired));    // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
   int threadnum=0;  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1796    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1797    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1798    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1799    
1800    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1801    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1802    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1803    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1804    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1805    {    {
1806  cout << "################################# " << sample << endl;      size_t roffset=0;
1807    #ifdef LAZY_STACK_PROF
1808        stackstart[omp_get_thread_num()]=&roffset;
1809        stackend[omp_get_thread_num()]=&roffset;
1810    #endif
1811        #pragma omp for schedule(static)
1812        for (sample=0;sample<totalsamples;++sample)
1813        {
1814            roffset=0;
1815  #ifdef _OPENMP  #ifdef _OPENMP
1816      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1817  #else  #else
1818      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1819  #endif  #endif
1820  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1821      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1822  cerr << "offset=" << outoffset << endl;              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1823      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1824      {      }
     resvec[outoffset]=(*res)[resoffset];  
     }  
 cerr << "*********************************" << endl;  
1825    }    }
1826    #ifdef LAZY_STACK_PROF
1827      for (int i=0;i<getNumberOfThreads();++i)
1828      {
1829        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1830    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1831        if (r>maxstackuse)
1832        {
1833            maxstackuse=r;
1834        }
1835      }
1836      cout << "Max resolve Stack use=" << maxstackuse << endl;
1837    #endif
1838    return resptr;    return resptr;
1839  }  }
1840    
# Line 780  std::string Line 1842  std::string
1842  DataLazy::toString() const  DataLazy::toString() const
1843  {  {
1844    ostringstream oss;    ostringstream oss;
1845    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1846    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1847      {
1848      case 1:   // tree format
1849        oss << endl;
1850        intoTreeString(oss,"");
1851        break;
1852      case 2:   // just the depth
1853        break;
1854      default:
1855        intoString(oss);
1856        break;
1857      }
1858    return oss.str();    return oss.str();
1859  }  }
1860    
# Line 789  DataLazy::toString() const Line 1862  DataLazy::toString() const
1862  void  void
1863  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1864  {  {
1865    //    oss << "[" << m_children <<";"<<m_height <<"]";
1866    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1867    {    {
1868    case G_IDENTITY:    case G_IDENTITY:
# Line 818  DataLazy::intoString(ostringstream& oss) Line 1892  DataLazy::intoString(ostringstream& oss)
1892      oss << ')';      oss << ')';
1893      break;      break;
1894    case G_UNARY:    case G_UNARY:
1895      case G_UNARY_P:
1896      case G_NP1OUT:
1897      case G_NP1OUT_P:
1898      case G_REDUCTION:
1899        oss << opToString(m_op) << '(';
1900        m_left->intoString(oss);
1901        oss << ')';
1902        break;
1903      case G_TENSORPROD:
1904        oss << opToString(m_op) << '(';
1905        m_left->intoString(oss);
1906        oss << ", ";
1907        m_right->intoString(oss);
1908        oss << ')';
1909        break;
1910      case G_NP1OUT_2P:
1911      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1912      m_left->intoString(oss);      m_left->intoString(oss);
1913        oss << ", " << m_axis_offset << ", " << m_transpose;
1914      oss << ')';      oss << ')';
1915      break;      break;
1916      case G_CONDEVAL:
1917        oss << opToString(m_op)<< '(' ;
1918        m_mask->intoString(oss);
1919        oss << " ? ";
1920        m_left->intoString(oss);
1921        oss << " : ";
1922        m_right->intoString(oss);
1923        oss << ')';
1924        break;
1925      default:
1926        oss << "UNKNOWN";
1927      }
1928    }
1929    
1930    
1931    void
1932    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1933    {
1934      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1935      switch (getOpgroup(m_op))
1936      {
1937      case G_IDENTITY:
1938        if (m_id->isExpanded())
1939        {
1940           oss << "E";
1941        }
1942        else if (m_id->isTagged())
1943        {
1944          oss << "T";
1945        }
1946        else if (m_id->isConstant())
1947        {
1948          oss << "C";
1949        }
1950        else
1951        {
1952          oss << "?";
1953        }
1954        oss << '@' << m_id.get() << endl;
1955        break;
1956      case G_BINARY:
1957        oss << opToString(m_op) << endl;
1958        indent+='.';
1959        m_left->intoTreeString(oss, indent);
1960        m_right->intoTreeString(oss, indent);
1961        break;
1962      case G_UNARY:
1963      case G_UNARY_P:
1964      case G_NP1OUT:
1965      case G_NP1OUT_P:
1966      case G_REDUCTION:
1967        oss << opToString(m_op) << endl;
1968        indent+='.';
1969        m_left->intoTreeString(oss, indent);
1970        break;
1971      case G_TENSORPROD:
1972        oss << opToString(m_op) << endl;
1973        indent+='.';
1974        m_left->intoTreeString(oss, indent);
1975        m_right->intoTreeString(oss, indent);
1976        break;
1977      case G_NP1OUT_2P:
1978        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1979        indent+='.';
1980        m_left->intoTreeString(oss, indent);
1981        break;
1982    default:    default:
1983      oss << "UNKNOWN";      oss << "UNKNOWN";
1984    }    }
1985  }  }
1986    
1987  // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
1988  DataAbstract*  DataAbstract*
1989  DataLazy::deepCopy()  DataLazy::deepCopy()
1990  {  {
1991    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
1992    {    {
1993      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1994      case G_UNARY:
1995      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1996      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1997      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1998      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1999      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2000      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2001      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2002      default:
2003        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2004    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2005  }  }
2006    
2007    
2008    
2009    // There is no single, natural interpretation of getLength on DataLazy.
2010    // Instances of DataReady can look at the size of their vectors.
2011    // For lazy though, it could be the size the data would be if it were resolved;
2012    // or it could be some function of the lengths of the DataReady instances which
2013    // form part of the expression.
2014    // Rather than have people making assumptions, I have disabled the method.
2015  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2016  DataLazy::getLength() const  DataLazy::getLength() const
2017  {  {
2018    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2019  }  }
2020    
2021    
# Line 853  DataLazy::getSlice(const DataTypes::Regi Line 2025  DataLazy::getSlice(const DataTypes::Regi
2025    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2026  }  }
2027    
2028    
2029    // To do this we need to rely on our child nodes
2030    DataTypes::ValueType::size_type
2031    DataLazy::getPointOffset(int sampleNo,
2032                     int dataPointNo)
2033    {
2034      if (m_op==IDENTITY)
2035      {
2036        return m_id->getPointOffset(sampleNo,dataPointNo);
2037      }
2038      if (m_readytype!='E')
2039      {
2040        collapse();
2041        return m_id->getPointOffset(sampleNo,dataPointNo);
2042      }
2043      // at this point we do not have an identity node and the expression will be Expanded
2044      // so we only need to know which child to ask
2045      if (m_left->m_readytype=='E')
2046      {
2047        return m_left->getPointOffset(sampleNo,dataPointNo);
2048      }
2049      else
2050      {
2051        return m_right->getPointOffset(sampleNo,dataPointNo);
2052      }
2053    }
2054    
2055    // To do this we need to rely on our child nodes
2056  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2057  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2058                   int dataPointNo) const                   int dataPointNo) const
2059  {  {
2060    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2061      {
2062        return m_id->getPointOffset(sampleNo,dataPointNo);
2063      }
2064      if (m_readytype=='E')
2065      {
2066        // at this point we do not have an identity node and the expression will be Expanded
2067        // so we only need to know which child to ask
2068        if (m_left->m_readytype=='E')
2069        {
2070        return m_left->getPointOffset(sampleNo,dataPointNo);
2071        }
2072        else
2073        {
2074        return m_right->getPointOffset(sampleNo,dataPointNo);
2075        }
2076      }
2077      if (m_readytype=='C')
2078      {
2079        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2080      }
2081      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2082  }  }
2083    
2084  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2085  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2086  void  void
2087  DataLazy::setToZero()  DataLazy::setToZero()
2088  {  {
2089    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
2090    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2091    m_op=IDENTITY;  //   m_op=IDENTITY;
2092    m_right.reset();    //   m_right.reset();  
2093    m_left.reset();  //   m_left.reset();
2094    m_readytype='C';  //   m_readytype='C';
2095    m_buffsRequired=1;  //   m_buffsRequired=1;
2096    
2097      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2098      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2099    }
2100    
2101    bool
2102    DataLazy::actsExpanded() const
2103    {
2104        return (m_readytype=='E');
2105  }  }
2106    
2107  }   // end namespace  }   // end namespace

Legend:
Removed from v.1910  
changed lines
  Added in v.3317

  ViewVC Help
Powered by ViewVC 1.1.26