/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger/escript/src/DataLazy.cpp revision 1910 by jfenwick, Thu Oct 23 03:05:28 2008 UTC trunk/escript/src/DataLazy.cpp revision 2500 by jfenwick, Tue Jun 30 00:42:38 2009 UTC
# Line 26  Line 26 
26  #include "DataTypes.h"  #include "DataTypes.h"
27  #include "Data.h"  #include "Data.h"
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29    #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #include <iomanip>      // for some fancy formatting in debug
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT
46    // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}
47    //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}
48    
49    #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
50    
51  /*  /*
52  How does DataLazy work?  How does DataLazy work?
# Line 47  I will refer to individual DataLazy obje Line 68  I will refer to individual DataLazy obje
68  Each node also stores:  Each node also stores:
69  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
70      evaluated.      evaluated.
 - m_length ~ how many values would be stored in the answer if the expression was evaluated.  
71  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
72      evaluate the expression.      evaluate the expression.
73  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
# Line 69  The convention that I use, is that the r Line 89  The convention that I use, is that the r
89    
90  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
91  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
92    
93    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
94    1) Add to the ES_optype.
95    2) determine what opgroup your operation belongs to (X)
96    3) add a string for the op to the end of ES_opstrings
97    4) increase ES_opcount
98    5) add an entry (X) to opgroups
99    6) add an entry to the switch in collapseToReady
100    7) add an entry to resolveX
101  */  */
102    
103    
# Line 78  using namespace boost; Line 107  using namespace boost;
107  namespace escript  namespace escript
108  {  {
109    
 const std::string&  
 opToString(ES_optype op);  
   
110  namespace  namespace
111  {  {
112    
# Line 89  enum ES_opgroup Line 115  enum ES_opgroup
115     G_UNKNOWN,     G_UNKNOWN,
116     G_IDENTITY,     G_IDENTITY,
117     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
118     G_UNARY      // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
119       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
120       G_NP1OUT,        // non-pointwise op with one output
121       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
122       G_TENSORPROD,    // general tensor product
123       G_NP1OUT_2P      // non-pointwise op with one output requiring two params
124  };  };
125    
126    
# Line 100  string ES_opstrings[]={"UNKNOWN","IDENTI Line 131  string ES_opstrings[]={"UNKNOWN","IDENTI
131              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
132              "asinh","acosh","atanh",              "asinh","acosh","atanh",
133              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
134              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
135  int ES_opcount=32;              "symmetric","nonsymmetric",
136                "prod",
137                "transpose", "trace",
138                "swapaxes"};
139    int ES_opcount=41;
140  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
141              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
142              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
143              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
144              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
145              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
146                G_NP1OUT,G_NP1OUT,
147                G_TENSORPROD,
148                G_NP1OUT_P, G_NP1OUT_P,
149                G_NP1OUT_2P};
150  inline  inline
151  ES_opgroup  ES_opgroup
152  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 124  resultFS(DataAbstract_ptr left, DataAbst Line 163  resultFS(DataAbstract_ptr left, DataAbst
163      // that way, if interpolate is required in any other op we can just throw a      // that way, if interpolate is required in any other op we can just throw a
164      // programming error exception.      // programming error exception.
165    
166      FunctionSpace l=left->getFunctionSpace();
167      if (left->getFunctionSpace()!=right->getFunctionSpace())    FunctionSpace r=right->getFunctionSpace();
168      {    if (l!=r)
169          throw DataException("FunctionSpaces not equal - interpolation not supported on lazy data.");    {
170      }      if (r.probeInterpolation(l))
171      return left->getFunctionSpace();      {
172        return l;
173        }
174        if (l.probeInterpolation(r))
175        {
176        return r;
177        }
178        throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
179      }
180      return l;
181  }  }
182    
183  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
184    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
185  DataTypes::ShapeType  DataTypes::ShapeType
186  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
187  {  {
188      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
189      {      {
190        if (getOpgroup(op)!=G_BINARY)        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
191        {        {
192          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
193        }        }
# Line 155  resultShape(DataAbstract_ptr left, DataA Line 204  resultShape(DataAbstract_ptr left, DataA
204      return left->getShape();      return left->getShape();
205  }  }
206    
207  // determine the number of points in the result of "left op right"  // return the shape for "op left"
208  size_t  
209  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataTypes::ShapeType
210    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
211  {  {
212     switch (getOpgroup(op))      switch(op)
213     {      {
214     case G_BINARY: return left->getLength();          case TRANS:
215     case G_UNARY: return left->getLength();         {            // for the scoping of variables
216     default:          const DataTypes::ShapeType& s=left->getShape();
217      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
218     }          int rank=left->getRank();
219            if (axis_offset<0 || axis_offset>rank)
220            {
221                   throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
222                }
223                for (int i=0; i<rank; i++)
224            {
225               int index = (axis_offset+i)%rank;
226                   sh.push_back(s[index]); // Append to new shape
227                }
228            return sh;
229           }
230        break;
231        case TRACE:
232           {
233            int rank=left->getRank();
234            if (rank<2)
235            {
236               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
237            }
238            if ((axis_offset>rank-2) || (axis_offset<0))
239            {
240               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
241            }
242            if (rank==2)
243            {
244               return DataTypes::scalarShape;
245            }
246            else if (rank==3)
247            {
248               DataTypes::ShapeType sh;
249                   if (axis_offset==0)
250               {
251                    sh.push_back(left->getShape()[2]);
252                   }
253                   else     // offset==1
254               {
255                sh.push_back(left->getShape()[0]);
256                   }
257               return sh;
258            }
259            else if (rank==4)
260            {
261               DataTypes::ShapeType sh;
262               const DataTypes::ShapeType& s=left->getShape();
263                   if (axis_offset==0)
264               {
265                    sh.push_back(s[2]);
266                    sh.push_back(s[3]);
267                   }
268                   else if (axis_offset==1)
269               {
270                    sh.push_back(s[0]);
271                    sh.push_back(s[3]);
272                   }
273               else     // offset==2
274               {
275                sh.push_back(s[0]);
276                sh.push_back(s[1]);
277               }
278               return sh;
279            }
280            else        // unknown rank
281            {
282               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
283            }
284           }
285        break;
286            default:
287        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
288        }
289    }
290    
291    DataTypes::ShapeType
292    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
293    {
294         // This code taken from the Data.cpp swapaxes() method
295         // Some of the checks are probably redundant here
296         int axis0_tmp,axis1_tmp;
297         const DataTypes::ShapeType& s=left->getShape();
298         DataTypes::ShapeType out_shape;
299         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
300         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
301         int rank=left->getRank();
302         if (rank<2) {
303            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
304         }
305         if (axis0<0 || axis0>rank-1) {
306            throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
307         }
308         if (axis1<0 || axis1>rank-1) {
309             throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
310         }
311         if (axis0 == axis1) {
312             throw DataException("Error - Data::swapaxes: axis indices must be different.");
313         }
314         if (axis0 > axis1) {
315             axis0_tmp=axis1;
316             axis1_tmp=axis0;
317         } else {
318             axis0_tmp=axis0;
319             axis1_tmp=axis1;
320         }
321         for (int i=0; i<rank; i++) {
322           if (i == axis0_tmp) {
323              out_shape.push_back(s[axis1_tmp]);
324           } else if (i == axis1_tmp) {
325              out_shape.push_back(s[axis0_tmp]);
326           } else {
327              out_shape.push_back(s[i]);
328           }
329         }
330        return out_shape;
331    }
332    
333    
334    // determine the output shape for the general tensor product operation
335    // the additional parameters return information required later for the product
336    // the majority of this code is copy pasted from C_General_Tensor_Product
337    DataTypes::ShapeType
338    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
339    {
340        
341      // Get rank and shape of inputs
342      int rank0 = left->getRank();
343      int rank1 = right->getRank();
344      const DataTypes::ShapeType& shape0 = left->getShape();
345      const DataTypes::ShapeType& shape1 = right->getShape();
346    
347      // Prepare for the loops of the product and verify compatibility of shapes
348      int start0=0, start1=0;
349      if (transpose == 0)       {}
350      else if (transpose == 1)  { start0 = axis_offset; }
351      else if (transpose == 2)  { start1 = rank1-axis_offset; }
352      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
353    
354      if (rank0<axis_offset)
355      {
356        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
357      }
358    
359      // Adjust the shapes for transpose
360      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
361      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
362      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
363      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
364    
365      // Prepare for the loops of the product
366      SL=1, SM=1, SR=1;
367      for (int i=0; i<rank0-axis_offset; i++)   {
368        SL *= tmpShape0[i];
369      }
370      for (int i=rank0-axis_offset; i<rank0; i++)   {
371        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
372          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
373        }
374        SM *= tmpShape0[i];
375      }
376      for (int i=axis_offset; i<rank1; i++)     {
377        SR *= tmpShape1[i];
378      }
379    
380      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
381      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
382      {         // block to limit the scope of out_index
383         int out_index=0;
384         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
385         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
386      }
387    
388      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
389      {
390         ostringstream os;
391         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
392         throw DataException(os.str());
393      }
394    
395      return shape2;
396  }  }
397    
398  // determine the number of samples requires to evaluate an expression combining left and right  // determine the number of samples requires to evaluate an expression combining left and right
399    // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
400    // The same goes for G_TENSORPROD
401    // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
402    // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
403    // multiple times
404  int  int
405  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
406  {  {
407     switch(getOpgroup(op))     switch(getOpgroup(op))
408     {     {
409     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
410     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
411     case G_UNARY: return max(left->getBuffsRequired(),1);     case G_UNARY:
412       case G_UNARY_P: return max(left->getBuffsRequired(),1);
413       case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
414       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
415       case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
416       case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
417     default:     default:
418      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
419     }     }
# Line 198  opToString(ES_optype op) Line 435  opToString(ES_optype op)
435    return ES_opstrings[op];    return ES_opstrings[op];
436  }  }
437    
438    #ifdef LAZY_NODE_STORAGE
439    void DataLazy::LazyNodeSetup()
440    {
441    #ifdef _OPENMP
442        int numthreads=omp_get_max_threads();
443        m_samples.resize(numthreads*m_samplesize);
444        m_sampleids=new int[numthreads];
445        for (int i=0;i<numthreads;++i)
446        {
447            m_sampleids[i]=-1;  
448        }
449    #else
450        m_samples.resize(m_samplesize);
451        m_sampleids=new int[1];
452        m_sampleids[0]=-1;
453    #endif  // _OPENMP
454    }
455    #endif   // LAZY_NODE_STORAGE
456    
457    
458    // Creates an identity node
459  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
460      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
461      m_op(IDENTITY)  #ifdef LAZY_NODE_STORAGE
462        ,m_sampleids(0),
463        m_samples(1)
464    #endif
465  {  {
466     if (p->isLazy())     if (p->isLazy())
467     {     {
# Line 212  DataLazy::DataLazy(DataAbstract_ptr p) Line 472  DataLazy::DataLazy(DataAbstract_ptr p)
472     }     }
473     else     else
474     {     {
475      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
476      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
477      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
478      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
479     }     }
480     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
481  }  }
482    
   
   
   
483  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
484      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),left->getShape()),
485      m_op(op)      m_op(op),
486        m_axis_offset(0),
487        m_transpose(0),
488        m_SL(0), m_SM(0), m_SR(0)
489  {  {
490     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))
491     {     {
492      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
493     }     }
494    
495     DataLazy_ptr lleft;     DataLazy_ptr lleft;
496     if (!left->isLazy())     if (!left->isLazy())
497     {     {
# Line 245  DataLazy::DataLazy(DataAbstract_ptr left Line 502  DataLazy::DataLazy(DataAbstract_ptr left
502      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
503     }     }
504     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
505     m_left=lleft;     m_left=lleft;
506     m_buffsRequired=1;     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
507     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
508       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
509       m_children=m_left->m_children+1;
510       m_height=m_left->m_height+1;
511    #ifdef LAZY_NODE_STORAGE
512       LazyNodeSetup();
513    #endif
514       SIZELIMIT
515  }  }
516    
517    
518    // In this constructor we need to consider interpolation
519  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
520      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
521      m_op(op)      m_op(op),
522        m_SL(0), m_SM(0), m_SR(0)
523  {  {
524     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
525       if ((getOpgroup(op)!=G_BINARY))
526     {     {
527      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
528     }     }
529    
530       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
531       {
532        FunctionSpace fs=getFunctionSpace();
533        Data ltemp(left);
534        Data tmp(ltemp,fs);
535        left=tmp.borrowDataPtr();
536       }
537       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
538       {
539        Data tmp(Data(right),getFunctionSpace());
540        right=tmp.borrowDataPtr();
541    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
542       }
543       left->operandCheck(*right);
544    
545     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
546     {     {
547      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
548    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
549     }     }
550     else     else
551     {     {
552      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
553    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
554     }     }
555     if (right->isLazy())     if (right->isLazy())
556     {     {
557      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
558    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
559     }     }
560     else     else
561     {     {
562      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
563    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
564     }     }
565     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
566     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 290  DataLazy::DataLazy(DataAbstract_ptr left Line 576  DataLazy::DataLazy(DataAbstract_ptr left
576     {     {
577      m_readytype='C';      m_readytype='C';
578     }     }
579     m_length=resultLength(m_left,m_right,m_op);     m_samplesize=getNumDPPSample()*getNoValues();
580     m_samplesize=getNumDPPSample()*getNoValues();         m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
581     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
582  cout << "(3)Lazy created with " << m_samplesize << endl;     m_children=m_left->m_children+m_right->m_children+2;
583       m_height=max(m_left->m_height,m_right->m_height)+1;
584    #ifdef LAZY_NODE_STORAGE
585       LazyNodeSetup();
586    #endif
587       SIZELIMIT
588    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
589  }  }
590    
591    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
592        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
593        m_op(op),
594        m_axis_offset(axis_offset),
595        m_transpose(transpose)
596    {
597       if ((getOpgroup(op)!=G_TENSORPROD))
598       {
599        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
600       }
601       if ((transpose>2) || (transpose<0))
602       {
603        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
604       }
605       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
606       {
607        FunctionSpace fs=getFunctionSpace();
608        Data ltemp(left);
609        Data tmp(ltemp,fs);
610        left=tmp.borrowDataPtr();
611       }
612       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
613       {
614        Data tmp(Data(right),getFunctionSpace());
615        right=tmp.borrowDataPtr();
616       }
617    //    left->operandCheck(*right);
618    
619       if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
620       {
621        m_left=dynamic_pointer_cast<DataLazy>(left);
622       }
623       else
624       {
625        m_left=DataLazy_ptr(new DataLazy(left));
626       }
627       if (right->isLazy())
628       {
629        m_right=dynamic_pointer_cast<DataLazy>(right);
630       }
631       else
632       {
633        m_right=DataLazy_ptr(new DataLazy(right));
634       }
635       char lt=m_left->m_readytype;
636       char rt=m_right->m_readytype;
637       if (lt=='E' || rt=='E')
638       {
639        m_readytype='E';
640       }
641       else if (lt=='T' || rt=='T')
642       {
643        m_readytype='T';
644       }
645       else
646       {
647        m_readytype='C';
648       }
649       m_samplesize=getNumDPPSample()*getNoValues();
650       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
651       m_buffsRequired=calcBuffs(m_left, m_right,m_op);
652       m_children=m_left->m_children+m_right->m_children+2;
653       m_height=max(m_left->m_height,m_right->m_height)+1;
654    #ifdef LAZY_NODE_STORAGE
655       LazyNodeSetup();
656    #endif
657       SIZELIMIT
658    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
659    }
660    
661    
662    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
663        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
664        m_op(op),
665        m_axis_offset(axis_offset),
666        m_transpose(0),
667        m_tol(0)
668    {
669       if ((getOpgroup(op)!=G_NP1OUT_P))
670       {
671        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
672       }
673       DataLazy_ptr lleft;
674       if (!left->isLazy())
675       {
676        lleft=DataLazy_ptr(new DataLazy(left));
677       }
678       else
679       {
680        lleft=dynamic_pointer_cast<DataLazy>(left);
681       }
682       m_readytype=lleft->m_readytype;
683       m_left=lleft;
684       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
685       m_samplesize=getNumDPPSample()*getNoValues();
686       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
687       m_children=m_left->m_children+1;
688       m_height=m_left->m_height+1;
689    #ifdef LAZY_NODE_STORAGE
690       LazyNodeSetup();
691    #endif
692       SIZELIMIT
693    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
694    }
695    
696    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
697        : parent(left->getFunctionSpace(), left->getShape()),
698        m_op(op),
699        m_axis_offset(0),
700        m_transpose(0),
701        m_tol(tol)
702    {
703       if ((getOpgroup(op)!=G_UNARY_P))
704       {
705        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
706       }
707       DataLazy_ptr lleft;
708       if (!left->isLazy())
709       {
710        lleft=DataLazy_ptr(new DataLazy(left));
711       }
712       else
713       {
714        lleft=dynamic_pointer_cast<DataLazy>(left);
715       }
716       m_readytype=lleft->m_readytype;
717       m_left=lleft;
718       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
719       m_samplesize=getNumDPPSample()*getNoValues();
720       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
721       m_children=m_left->m_children+1;
722       m_height=m_left->m_height+1;
723    #ifdef LAZY_NODE_STORAGE
724       LazyNodeSetup();
725    #endif
726       SIZELIMIT
727    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
728    }
729    
730    
731    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
732        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
733        m_op(op),
734        m_axis_offset(axis0),
735        m_transpose(axis1),
736        m_tol(0)
737    {
738       if ((getOpgroup(op)!=G_NP1OUT_2P))
739       {
740        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
741       }
742       DataLazy_ptr lleft;
743       if (!left->isLazy())
744       {
745        lleft=DataLazy_ptr(new DataLazy(left));
746       }
747       else
748       {
749        lleft=dynamic_pointer_cast<DataLazy>(left);
750       }
751       m_readytype=lleft->m_readytype;
752       m_left=lleft;
753       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
754       m_samplesize=getNumDPPSample()*getNoValues();
755       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
756       m_children=m_left->m_children+1;
757       m_height=m_left->m_height+1;
758    #ifdef LAZY_NODE_STORAGE
759       LazyNodeSetup();
760    #endif
761       SIZELIMIT
762    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
763    }
764    
765  DataLazy::~DataLazy()  DataLazy::~DataLazy()
766  {  {
767    #ifdef LAZY_NODE_SETUP
768       delete[] m_sampleids;
769       delete[] m_samples;
770    #endif
771  }  }
772    
773    
# Line 309  DataLazy::getBuffsRequired() const Line 778  DataLazy::getBuffsRequired() const
778  }  }
779    
780    
781    size_t
782    DataLazy::getMaxSampleSize() const
783    {
784        return m_maxsamplesize;
785    }
786    
787    
788    
789    size_t
790    DataLazy::getSampleBufferSize() const
791    {
792        return m_maxsamplesize*(max(1,m_buffsRequired));
793    }
794    
795  /*  /*
796    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
797    This does the work for the collapse method.    This does the work for the collapse method.
# Line 328  DataLazy::collapseToReady() Line 811  DataLazy::collapseToReady()
811    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
812    Data left(pleft);    Data left(pleft);
813    Data right;    Data right;
814    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
815    {    {
816      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
817    }    }
# Line 427  DataLazy::collapseToReady() Line 910  DataLazy::collapseToReady()
910      case LEZ:      case LEZ:
911      result=left.whereNonPositive();      result=left.whereNonPositive();
912      break;      break;
913        case NEZ:
914        result=left.whereNonZero(m_tol);
915        break;
916        case EZ:
917        result=left.whereZero(m_tol);
918        break;
919        case SYM:
920        result=left.symmetric();
921        break;
922        case NSYM:
923        result=left.nonsymmetric();
924        break;
925        case PROD:
926        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
927        break;
928        case TRANS:
929        result=left.transpose(m_axis_offset);
930        break;
931        case TRACE:
932        result=left.trace(m_axis_offset);
933        break;
934        case SWAP:
935        result=left.swapaxes(m_axis_offset, m_transpose);
936        break;
937      default:      default:
938      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
939    }    }
940    return result.borrowReadyPtr();    return result.borrowReadyPtr();
941  }  }
# Line 455  DataLazy::collapse() Line 962  DataLazy::collapse()
962  }  }
963    
964  /*  /*
965    \brief Compute the value of the expression (binary operation) for the given sample.    \brief Compute the value of the expression (unary operation) for the given sample.
966    \return Vector which stores the value of the subexpression for the given sample.    \return Vector which stores the value of the subexpression for the given sample.
967    \param v A vector to store intermediate results.    \param v A vector to store intermediate results.
968    \param offset Index in v to begin storing results.    \param offset Index in v to begin storing results.
# Line 483  DataLazy::resolveUnary(ValueType& v, siz Line 990  DataLazy::resolveUnary(ValueType& v, siz
990    switch (m_op)    switch (m_op)
991    {    {
992      case SIN:        case SIN:  
993      tensor_unary_operation(m_samplesize, left, result, ::sin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
994      break;      break;
995      case COS:      case COS:
996      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
997      break;      break;
998      case TAN:      case TAN:
999      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1000      break;      break;
1001      case ASIN:      case ASIN:
1002      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1003      break;      break;
1004      case ACOS:      case ACOS:
1005      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1006      break;      break;
1007      case ATAN:      case ATAN:
1008      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1009      break;      break;
1010      case SINH:      case SINH:
1011      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1012      break;      break;
1013      case COSH:      case COSH:
1014      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1015      break;      break;
1016      case TANH:      case TANH:
1017      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1018      break;      break;
1019      case ERF:      case ERF:
1020  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1021      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1022  #else  #else
1023      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1024      break;      break;
1025  #endif  #endif
1026     case ASINH:     case ASINH:
1027  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1028      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1029  #else  #else
1030      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1031  #endif    #endif  
1032      break;      break;
1033     case ACOSH:     case ACOSH:
1034  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1035      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1036  #else  #else
1037      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1038  #endif    #endif  
1039      break;      break;
1040     case ATANH:     case ATANH:
1041  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1042      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1043  #else  #else
1044      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1045  #endif    #endif  
1046      break;      break;
1047      case LOG10:      case LOG10:
1048      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1049      break;      break;
1050      case LOG:      case LOG:
1051      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1052      break;      break;
1053      case SIGN:      case SIGN:
1054      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1055      break;      break;
1056      case ABS:      case ABS:
1057      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1058      break;      break;
1059      case NEG:      case NEG:
1060      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 558  DataLazy::resolveUnary(ValueType& v, siz Line 1065  DataLazy::resolveUnary(ValueType& v, siz
1065      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1066      break;      break;
1067      case EXP:      case EXP:
1068      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1069      break;      break;
1070      case SQRT:      case SQRT:
1071      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1072      break;      break;
1073      case RECIP:      case RECIP:
1074      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 578  DataLazy::resolveUnary(ValueType& v, siz Line 1085  DataLazy::resolveUnary(ValueType& v, siz
1085      case LEZ:      case LEZ:
1086      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1087      break;      break;
1088    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1089        case NEZ:
1090        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1091        break;
1092        case EZ:
1093        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1094        break;
1095    
1096      default:      default:
1097      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
# Line 589  DataLazy::resolveUnary(ValueType& v, siz Line 1103  DataLazy::resolveUnary(ValueType& v, siz
1103    
1104    
1105    
1106  #define PROC_OP(X) \  
1107      for (int i=0;i<steps;++i,resultp+=resultStep) \  /*
1108      { \    \brief Compute the value of the expression (unary operation) for the given sample.
1109         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    \return Vector which stores the value of the subexpression for the given sample.
1110         lroffset+=leftStep; \    \param v A vector to store intermediate results.
1111         rroffset+=rightStep; \    \param offset Index in v to begin storing results.
1112      \param sampleNo Sample number to evaluate.
1113      \param roffset (output parameter) the offset in the return vector where the result begins.
1114    
1115      The return value will be an existing vector so do not deallocate it.
1116      If the result is stored in v it should be stored at the offset given.
1117      Everything from offset to the end of v should be considered available for this method to use.
1118    */
1119    DataTypes::ValueType*
1120    DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1121    {
1122        // we assume that any collapsing has been done before we get here
1123        // since we only have one argument we don't need to think about only
1124        // processing single points.
1125      if (m_readytype!='E')
1126      {
1127        throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");
1128      }
1129        // since we can't write the result over the input, we need a result offset further along
1130      size_t subroffset=roffset+m_samplesize;
1131    LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1132      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1133      roffset=offset;
1134      size_t loop=0;
1135      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1136      size_t step=getNoValues();
1137      switch (m_op)
1138      {
1139        case SYM:
1140        for (loop=0;loop<numsteps;++loop)
1141        {
1142            DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1143            subroffset+=step;
1144            offset+=step;
1145        }
1146        break;
1147        case NSYM:
1148        for (loop=0;loop<numsteps;++loop)
1149        {
1150            DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1151            subroffset+=step;
1152            offset+=step;
1153        }
1154        break;
1155        default:
1156        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1157      }
1158      return &v;
1159    }
1160    
1161    /*
1162      \brief Compute the value of the expression (unary operation) for the given sample.
1163      \return Vector which stores the value of the subexpression for the given sample.
1164      \param v A vector to store intermediate results.
1165      \param offset Index in v to begin storing results.
1166      \param sampleNo Sample number to evaluate.
1167      \param roffset (output parameter) the offset in the return vector where the result begins.
1168    
1169      The return value will be an existing vector so do not deallocate it.
1170      If the result is stored in v it should be stored at the offset given.
1171      Everything from offset to the end of v should be considered available for this method to use.
1172    */
1173    DataTypes::ValueType*
1174    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1175    {
1176        // we assume that any collapsing has been done before we get here
1177        // since we only have one argument we don't need to think about only
1178        // processing single points.
1179      if (m_readytype!='E')
1180      {
1181        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1182      }
1183        // since we can't write the result over the input, we need a result offset further along
1184      size_t subroffset;
1185      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1186    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1187    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1188      roffset=offset;
1189      size_t loop=0;
1190      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1191      size_t outstep=getNoValues();
1192      size_t instep=m_left->getNoValues();
1193    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1194      switch (m_op)
1195      {
1196        case TRACE:
1197        for (loop=0;loop<numsteps;++loop)
1198        {
1199    size_t zz=sampleNo*getNumDPPSample()+loop;
1200    if (zz==5800)
1201    {
1202    LAZYDEBUG(cerr << "point=" <<  zz<< endl;)
1203    LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1204    LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1205    LAZYDEBUG(cerr << subroffset << endl;)
1206    LAZYDEBUG(cerr << "output=" << offset << endl;)
1207    }
1208                DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1209    if (zz==5800)
1210    {
1211    LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1212    }
1213            subroffset+=instep;
1214            offset+=outstep;
1215        }
1216        break;
1217        case TRANS:
1218        for (loop=0;loop<numsteps;++loop)
1219        {
1220                DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1221            subroffset+=instep;
1222            offset+=outstep;
1223        }
1224        break;
1225        default:
1226        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1227      }
1228      return &v;
1229    }
1230    
1231    
1232    /*
1233      \brief Compute the value of the expression (unary operation with int params) for the given sample.
1234      \return Vector which stores the value of the subexpression for the given sample.
1235      \param v A vector to store intermediate results.
1236      \param offset Index in v to begin storing results.
1237      \param sampleNo Sample number to evaluate.
1238      \param roffset (output parameter) the offset in the return vector where the result begins.
1239    
1240      The return value will be an existing vector so do not deallocate it.
1241      If the result is stored in v it should be stored at the offset given.
1242      Everything from offset to the end of v should be considered available for this method to use.
1243    */
1244    DataTypes::ValueType*
1245    DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1246    {
1247        // we assume that any collapsing has been done before we get here
1248        // since we only have one argument we don't need to think about only
1249        // processing single points.
1250      if (m_readytype!='E')
1251      {
1252        throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1253      }
1254        // since we can't write the result over the input, we need a result offset further along
1255      size_t subroffset;
1256      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1257    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1258    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1259      roffset=offset;
1260      size_t loop=0;
1261      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1262      size_t outstep=getNoValues();
1263      size_t instep=m_left->getNoValues();
1264    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1265      switch (m_op)
1266      {
1267        case SWAP:
1268        for (loop=0;loop<numsteps;++loop)
1269        {
1270                DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1271            subroffset+=instep;
1272            offset+=outstep;
1273        }
1274        break;
1275        default:
1276        throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1277      }
1278      return &v;
1279    }
1280    
1281    
1282    
1283    #define PROC_OP(TYPE,X)                               \
1284        for (int j=0;j<onumsteps;++j)\
1285        {\
1286          for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1287          { \
1288    LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1289    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1290             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1291    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1292             lroffset+=leftstep; \
1293             rroffset+=rightstep; \
1294          }\
1295          lroffset+=oleftstep;\
1296          rroffset+=orightstep;\
1297      }      }
1298    
1299  /*  /*
# Line 621  DataLazy::resolveUnary(ValueType& v, siz Line 1320  DataLazy::resolveUnary(ValueType& v, siz
1320  DataTypes::ValueType*  DataTypes::ValueType*
1321  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1322  {  {
1323  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1324    
1325    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1326      // first work out which of the children are expanded      // first work out which of the children are expanded
1327    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1328    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1329    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1330    int steps=(bigloops?1:getNumDPPSample());    {
1331    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1332    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1333    {    bool leftScalar=(m_left->getRank()==0);
1334      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1335      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1336      chunksize=1;    // for scalar    {
1337    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1338    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1339    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1340    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1341      size_t chunksize=1;           // how many doubles will be processed in one go
1342      int leftstep=0;       // how far should the left offset advance after each step
1343      int rightstep=0;
1344      int numsteps=0;       // total number of steps for the inner loop
1345      int oleftstep=0;  // the o variables refer to the outer loop
1346      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1347      int onumsteps=1;
1348      
1349      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1350      bool RES=(rightExp && rightScalar);
1351      bool LS=(!leftExp && leftScalar); // left is a single scalar
1352      bool RS=(!rightExp && rightScalar);
1353      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1354      bool RN=(!rightExp && !rightScalar);
1355      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1356      bool REN=(rightExp && !rightScalar);
1357    
1358      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1359      {
1360        chunksize=m_left->getNumDPPSample()*leftsize;
1361        leftstep=0;
1362        rightstep=0;
1363        numsteps=1;
1364      }
1365      else if (LES || RES)
1366      {
1367        chunksize=1;
1368        if (LES)        // left is an expanded scalar
1369        {
1370            if (RS)
1371            {
1372               leftstep=1;
1373               rightstep=0;
1374               numsteps=m_left->getNumDPPSample();
1375            }
1376            else        // RN or REN
1377            {
1378               leftstep=0;
1379               oleftstep=1;
1380               rightstep=1;
1381               orightstep=(RN ? -(int)rightsize : 0);
1382               numsteps=rightsize;
1383               onumsteps=m_left->getNumDPPSample();
1384            }
1385        }
1386        else        // right is an expanded scalar
1387        {
1388            if (LS)
1389            {
1390               rightstep=1;
1391               leftstep=0;
1392               numsteps=m_right->getNumDPPSample();
1393            }
1394            else
1395            {
1396               rightstep=0;
1397               orightstep=1;
1398               leftstep=1;
1399               oleftstep=(LN ? -(int)leftsize : 0);
1400               numsteps=leftsize;
1401               onumsteps=m_right->getNumDPPSample();
1402            }
1403        }
1404      }
1405      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1406      {
1407        if (LEN)    // and Right will be a single value
1408        {
1409            chunksize=rightsize;
1410            leftstep=rightsize;
1411            rightstep=0;
1412            numsteps=m_left->getNumDPPSample();
1413            if (RS)
1414            {
1415               numsteps*=leftsize;
1416            }
1417        }
1418        else    // REN
1419        {
1420            chunksize=leftsize;
1421            rightstep=leftsize;
1422            leftstep=0;
1423            numsteps=m_right->getNumDPPSample();
1424            if (LS)
1425            {
1426               numsteps*=rightsize;
1427            }
1428        }
1429      }
1430    
1431      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1432      // Get the values of sub-expressions      // Get the values of sub-expressions
1433    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on
1434    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note      // calcBufss for why we can't put offset as the 2nd param above
1435      const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1436      // the right child starts further along.      // the right child starts further along.
1437    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1438    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1439    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1440    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1441    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1442    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1443    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1444    
1445    
1446    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1447    switch(m_op)    switch(m_op)
1448    {    {
1449      case ADD:      case ADD:
1450      PROC_OP(plus<double>());          PROC_OP(NO_ARG,plus<double>());
1451      break;      break;
1452      case SUB:      case SUB:
1453      PROC_OP(minus<double>());      PROC_OP(NO_ARG,minus<double>());
1454      break;      break;
1455      case MUL:      case MUL:
1456      PROC_OP(multiplies<double>());      PROC_OP(NO_ARG,multiplies<double>());
1457      break;      break;
1458      case DIV:      case DIV:
1459      PROC_OP(divides<double>());      PROC_OP(NO_ARG,divides<double>());
1460      break;      break;
1461      case POW:      case POW:
1462      PROC_OP(::pow);         PROC_OP(double (double,double),::pow);
1463      break;      break;
1464      default:      default:
1465      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1466    }    }
1467    roffset=offset;      roffset=offset;
1468    return &v;    return &v;
1469  }  }
1470    
1471    
1472    
1473  /*  /*
1474      \brief Compute the value of the expression (tensor product) for the given sample.
1475      \return Vector which stores the value of the subexpression for the given sample.
1476      \param v A vector to store intermediate results.
1477      \param offset Index in v to begin storing results.
1478      \param sampleNo Sample number to evaluate.
1479      \param roffset (output parameter) the offset in the return vector where the result begins.
1480    
1481      The return value will be an existing vector so do not deallocate it.
1482      If the result is stored in v it should be stored at the offset given.
1483      Everything from offset to the end of v should be considered available for this method to use.
1484    */
1485    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1486    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1487    // unlike the other resolve helpers, we must treat these datapoints separately.
1488    DataTypes::ValueType*
1489    DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1490    {
1491    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)
1492    
1493      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1494        // first work out which of the children are expanded
1495      bool leftExp=(m_left->m_readytype=='E');
1496      bool rightExp=(m_right->m_readytype=='E');
1497      int steps=getNumDPPSample();
1498    /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1499      int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1500      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1501      int rightStep=(rightExp?m_right->getNoValues() : 0);
1502    
1503      int resultStep=getNoValues();
1504        // Get the values of sub-expressions (leave a gap of one sample for the result).
1505      int gap=offset+m_samplesize;  
1506    
1507    LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1508    
1509      const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1510      gap+=m_left->getMaxSampleSize();
1511    
1512    
1513    LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1514    
1515    
1516      const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1517    
1518    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1519    cout << getNoValues() << endl;)
1520    LAZYDEBUG(cerr << "Result of left=";)
1521    LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1522    
1523    for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1524    {
1525    cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1526    if (i%4==0) cout << endl;
1527    })
1528    LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1529    LAZYDEBUG(
1530    for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1531    {
1532    cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1533    if (i%4==0) cout << endl;
1534    }
1535    cerr << endl;
1536    )
1537    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1538    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1539    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1540    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1541    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1542    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1543    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1544    
1545      double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1546      switch(m_op)
1547      {
1548        case PROD:
1549        for (int i=0;i<steps;++i,resultp+=resultStep)
1550        {
1551    
1552    LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1553    LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1554    LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1555    
1556              const double *ptr_0 = &((*left)[lroffset]);
1557              const double *ptr_1 = &((*right)[rroffset]);
1558    
1559    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1560    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1561    
1562              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1563    
1564    LAZYDEBUG(cout << "Results--\n";
1565    {
1566      DataVector dv(getNoValues());
1567    for (int z=0;z<getNoValues();++z)
1568    {
1569      cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1570      if (z%4==0) cout << endl;
1571      dv[z]=resultp[z];
1572    }
1573    cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1574    cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1575    }
1576    )
1577          lroffset+=leftStep;
1578          rroffset+=rightStep;
1579        }
1580        break;
1581        default:
1582        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1583      }
1584      roffset=offset;
1585      return &v;
1586    }
1587    
1588    
1589    #ifdef LAZY_NODE_STORAGE
1590    
1591    // The result will be stored in m_samples
1592    // The return value is a pointer to the DataVector, offset is the offset within the return value
1593    const DataTypes::ValueType*
1594    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1595    {
1596    ENABLEDEBUG
1597    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1598        // collapse so we have a 'E' node or an IDENTITY for some other type
1599      if (m_readytype!='E' && m_op!=IDENTITY)
1600      {
1601        collapse();
1602      }
1603      if (m_op==IDENTITY)  
1604      {
1605        const ValueType& vec=m_id->getVectorRO();
1606    //     if (m_readytype=='C')
1607    //     {
1608    //  roffset=0;      // all samples read from the same position
1609    //  return &(m_samples);
1610    //     }
1611        roffset=m_id->getPointOffset(sampleNo, 0);
1612        return &(vec);
1613      }
1614      if (m_readytype!='E')
1615      {
1616        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1617      }
1618      switch (getOpgroup(m_op))
1619      {
1620      case G_UNARY:
1621      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1622      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1623      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1624      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1625      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1626      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1627      default:
1628        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1629      }
1630    }
1631    
1632    const DataTypes::ValueType*
1633    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1634    {
1635        // we assume that any collapsing has been done before we get here
1636        // since we only have one argument we don't need to think about only
1637        // processing single points.
1638        // we will also know we won't get identity nodes
1639      if (m_readytype!='E')
1640      {
1641        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1642      }
1643      if (m_op==IDENTITY)
1644      {
1645        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1646      }
1647      if (m_sampleids[tid]==sampleNo)
1648      {
1649        roffset=tid*m_samplesize;
1650        return &(m_samples);        // sample is already resolved
1651      }
1652      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1653      const double* left=&((*leftres)[roffset]);
1654      roffset=m_samplesize*tid;
1655      double* result=&(m_samples[roffset]);
1656      switch (m_op)
1657      {
1658        case SIN:  
1659        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1660        break;
1661        case COS:
1662        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1663        break;
1664        case TAN:
1665        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1666        break;
1667        case ASIN:
1668        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1669        break;
1670        case ACOS:
1671        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1672        break;
1673        case ATAN:
1674        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1675        break;
1676        case SINH:
1677        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1678        break;
1679        case COSH:
1680        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1681        break;
1682        case TANH:
1683        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1684        break;
1685        case ERF:
1686    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1687        throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1688    #else
1689        tensor_unary_operation(m_samplesize, left, result, ::erf);
1690        break;
1691    #endif
1692       case ASINH:
1693    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1694        tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1695    #else
1696        tensor_unary_operation(m_samplesize, left, result, ::asinh);
1697    #endif  
1698        break;
1699       case ACOSH:
1700    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1701        tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1702    #else
1703        tensor_unary_operation(m_samplesize, left, result, ::acosh);
1704    #endif  
1705        break;
1706       case ATANH:
1707    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1708        tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1709    #else
1710        tensor_unary_operation(m_samplesize, left, result, ::atanh);
1711    #endif  
1712        break;
1713        case LOG10:
1714        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1715        break;
1716        case LOG:
1717        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1718        break;
1719        case SIGN:
1720        tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1721        break;
1722        case ABS:
1723        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1724        break;
1725        case NEG:
1726        tensor_unary_operation(m_samplesize, left, result, negate<double>());
1727        break;
1728        case POS:
1729        // it doesn't mean anything for delayed.
1730        // it will just trigger a deep copy of the lazy object
1731        throw DataException("Programmer error - POS not supported for lazy data.");
1732        break;
1733        case EXP:
1734        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1735        break;
1736        case SQRT:
1737        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1738        break;
1739        case RECIP:
1740        tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1741        break;
1742        case GZ:
1743        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1744        break;
1745        case LZ:
1746        tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1747        break;
1748        case GEZ:
1749        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1750        break;
1751        case LEZ:
1752        tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1753        break;
1754    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1755        case NEZ:
1756        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1757        break;
1758        case EZ:
1759        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1760        break;
1761    
1762        default:
1763        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1764      }
1765      return &(m_samples);
1766    }
1767    
1768    
1769    const DataTypes::ValueType*
1770    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1771    {
1772        // we assume that any collapsing has been done before we get here
1773        // since we only have one argument we don't need to think about only
1774        // processing single points.
1775      if (m_readytype!='E')
1776      {
1777        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1778      }
1779      if (m_op==IDENTITY)
1780      {
1781        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1782      }
1783      size_t subroffset;
1784      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1785      roffset=m_samplesize*tid;
1786      size_t loop=0;
1787      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1788      size_t step=getNoValues();
1789      size_t offset=roffset;
1790      switch (m_op)
1791      {
1792        case SYM:
1793        for (loop=0;loop<numsteps;++loop)
1794        {
1795            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1796            subroffset+=step;
1797            offset+=step;
1798        }
1799        break;
1800        case NSYM:
1801        for (loop=0;loop<numsteps;++loop)
1802        {
1803            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1804            subroffset+=step;
1805            offset+=step;
1806        }
1807        break;
1808        default:
1809        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1810      }
1811      return &m_samples;
1812    }
1813    
1814    const DataTypes::ValueType*
1815    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1816    {
1817        // we assume that any collapsing has been done before we get here
1818        // since we only have one argument we don't need to think about only
1819        // processing single points.
1820      if (m_readytype!='E')
1821      {
1822        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1823      }
1824      if (m_op==IDENTITY)
1825      {
1826        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1827      }
1828      size_t subroffset;
1829      size_t offset;
1830      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1831      roffset=m_samplesize*tid;
1832      offset=roffset;
1833      size_t loop=0;
1834      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1835      size_t outstep=getNoValues();
1836      size_t instep=m_left->getNoValues();
1837      switch (m_op)
1838      {
1839        case TRACE:
1840        for (loop=0;loop<numsteps;++loop)
1841        {
1842                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1843            subroffset+=instep;
1844            offset+=outstep;
1845        }
1846        break;
1847        case TRANS:
1848        for (loop=0;loop<numsteps;++loop)
1849        {
1850                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1851            subroffset+=instep;
1852            offset+=outstep;
1853        }
1854        break;
1855        default:
1856        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1857      }
1858      return &m_samples;
1859    }
1860    
1861    
1862    const DataTypes::ValueType*
1863    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1864    {
1865      if (m_readytype!='E')
1866      {
1867        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1868      }
1869      if (m_op==IDENTITY)
1870      {
1871        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1872      }
1873      size_t subroffset;
1874      size_t offset;
1875      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1876      roffset=m_samplesize*tid;
1877      offset=roffset;
1878      size_t loop=0;
1879      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1880      size_t outstep=getNoValues();
1881      size_t instep=m_left->getNoValues();
1882      switch (m_op)
1883      {
1884        case SWAP:
1885        for (loop=0;loop<numsteps;++loop)
1886        {
1887                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1888            subroffset+=instep;
1889            offset+=outstep;
1890        }
1891        break;
1892        default:
1893        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1894      }
1895      return &m_samples;
1896    }
1897    
1898    
1899    
1900    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1901    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1902    // If both children are expanded, then we can process them in a single operation (we treat
1903    // the whole sample as one big datapoint.
1904    // If one of the children is not expanded, then we need to treat each point in the sample
1905    // individually.
1906    // There is an additional complication when scalar operations are considered.
1907    // For example, 2+Vector.
1908    // In this case each double within the point is treated individually
1909    const DataTypes::ValueType*
1910    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1911    {
1912    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1913    
1914      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1915        // first work out which of the children are expanded
1916      bool leftExp=(m_left->m_readytype=='E');
1917      bool rightExp=(m_right->m_readytype=='E');
1918      if (!leftExp && !rightExp)
1919      {
1920        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1921      }
1922      bool leftScalar=(m_left->getRank()==0);
1923      bool rightScalar=(m_right->getRank()==0);
1924      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1925      {
1926        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1927      }
1928      size_t leftsize=m_left->getNoValues();
1929      size_t rightsize=m_right->getNoValues();
1930      size_t chunksize=1;           // how many doubles will be processed in one go
1931      int leftstep=0;       // how far should the left offset advance after each step
1932      int rightstep=0;
1933      int numsteps=0;       // total number of steps for the inner loop
1934      int oleftstep=0;  // the o variables refer to the outer loop
1935      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1936      int onumsteps=1;
1937      
1938      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1939      bool RES=(rightExp && rightScalar);
1940      bool LS=(!leftExp && leftScalar); // left is a single scalar
1941      bool RS=(!rightExp && rightScalar);
1942      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1943      bool RN=(!rightExp && !rightScalar);
1944      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1945      bool REN=(rightExp && !rightScalar);
1946    
1947      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1948      {
1949        chunksize=m_left->getNumDPPSample()*leftsize;
1950        leftstep=0;
1951        rightstep=0;
1952        numsteps=1;
1953      }
1954      else if (LES || RES)
1955      {
1956        chunksize=1;
1957        if (LES)        // left is an expanded scalar
1958        {
1959            if (RS)
1960            {
1961               leftstep=1;
1962               rightstep=0;
1963               numsteps=m_left->getNumDPPSample();
1964            }
1965            else        // RN or REN
1966            {
1967               leftstep=0;
1968               oleftstep=1;
1969               rightstep=1;
1970               orightstep=(RN ? -(int)rightsize : 0);
1971               numsteps=rightsize;
1972               onumsteps=m_left->getNumDPPSample();
1973            }
1974        }
1975        else        // right is an expanded scalar
1976        {
1977            if (LS)
1978            {
1979               rightstep=1;
1980               leftstep=0;
1981               numsteps=m_right->getNumDPPSample();
1982            }
1983            else
1984            {
1985               rightstep=0;
1986               orightstep=1;
1987               leftstep=1;
1988               oleftstep=(LN ? -(int)leftsize : 0);
1989               numsteps=leftsize;
1990               onumsteps=m_right->getNumDPPSample();
1991            }
1992        }
1993      }
1994      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1995      {
1996        if (LEN)    // and Right will be a single value
1997        {
1998            chunksize=rightsize;
1999            leftstep=rightsize;
2000            rightstep=0;
2001            numsteps=m_left->getNumDPPSample();
2002            if (RS)
2003            {
2004               numsteps*=leftsize;
2005            }
2006        }
2007        else    // REN
2008        {
2009            chunksize=leftsize;
2010            rightstep=leftsize;
2011            leftstep=0;
2012            numsteps=m_right->getNumDPPSample();
2013            if (LS)
2014            {
2015               numsteps*=rightsize;
2016            }
2017        }
2018      }
2019    
2020      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
2021        // Get the values of sub-expressions
2022      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
2023      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2024    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2025    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2026    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2027    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2028    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2029    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2030    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
2031    
2032    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2033    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2034    
2035    
2036      roffset=m_samplesize*tid;
2037      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
2038      switch(m_op)
2039      {
2040        case ADD:
2041            PROC_OP(NO_ARG,plus<double>());
2042        break;
2043        case SUB:
2044        PROC_OP(NO_ARG,minus<double>());
2045        break;
2046        case MUL:
2047        PROC_OP(NO_ARG,multiplies<double>());
2048        break;
2049        case DIV:
2050        PROC_OP(NO_ARG,divides<double>());
2051        break;
2052        case POW:
2053           PROC_OP(double (double,double),::pow);
2054        break;
2055        default:
2056        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2057      }
2058    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2059      return &m_samples;
2060    }
2061    
2062    
2063    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2064    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2065    // unlike the other resolve helpers, we must treat these datapoints separately.
2066    const DataTypes::ValueType*
2067    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2068    {
2069    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2070    
2071      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2072        // first work out which of the children are expanded
2073      bool leftExp=(m_left->m_readytype=='E');
2074      bool rightExp=(m_right->m_readytype=='E');
2075      int steps=getNumDPPSample();
2076      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
2077      int rightStep=(rightExp?m_right->getNoValues() : 0);
2078    
2079      int resultStep=getNoValues();
2080      roffset=m_samplesize*tid;
2081      size_t offset=roffset;
2082    
2083      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2084    
2085      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2086    
2087    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
2088    cout << getNoValues() << endl;)
2089    
2090    
2091    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2092    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2093    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2094    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2095    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2096    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2097    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2098    
2099      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
2100      switch(m_op)
2101      {
2102        case PROD:
2103        for (int i=0;i<steps;++i,resultp+=resultStep)
2104        {
2105              const double *ptr_0 = &((*left)[lroffset]);
2106              const double *ptr_1 = &((*right)[rroffset]);
2107    
2108    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2109    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2110    
2111              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2112    
2113          lroffset+=leftStep;
2114          rroffset+=rightStep;
2115        }
2116        break;
2117        default:
2118        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2119      }
2120      roffset=offset;
2121      return &m_samples;
2122    }
2123    #endif //LAZY_NODE_STORAGE
2124    
2125    /*
2126    \brief Compute the value of the expression for the given sample.    \brief Compute the value of the expression for the given sample.
2127    \return Vector which stores the value of the subexpression for the given sample.    \return Vector which stores the value of the subexpression for the given sample.
2128    \param v A vector to store intermediate results.    \param v A vector to store intermediate results.
# Line 689  cout << "Resolve binary: " << toString() Line 2141  cout << "Resolve binary: " << toString()
2141  const DataTypes::ValueType*  const DataTypes::ValueType*
2142  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
2143  {  {
2144  cout << "Resolve sample " << toString() << endl;  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
2145      // collapse so we have a 'E' node or an IDENTITY for some other type      // collapse so we have a 'E' node or an IDENTITY for some other type
2146    if (m_readytype!='E' && m_op!=IDENTITY)    if (m_readytype!='E' && m_op!=IDENTITY)
2147    {    {
# Line 697  cout << "Resolve sample " << toString() Line 2149  cout << "Resolve sample " << toString()
2149    }    }
2150    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
2151    {    {
2152      const ValueType& vec=m_id->getVector();      const ValueType& vec=m_id->getVectorRO();
2153      if (m_readytype=='C')      if (m_readytype=='C')
2154      {      {
2155      roffset=0;      roffset=0;
2156    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
2157      return &(vec);      return &(vec);
2158      }      }
2159      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
2160    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
2161      return &(vec);      return &(vec);
2162    }    }
2163    if (m_readytype!='E')    if (m_readytype!='E')
# Line 712  cout << "Resolve sample " << toString() Line 2166  cout << "Resolve sample " << toString()
2166    }    }
2167    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2168    {    {
2169    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);    case G_UNARY:
2170      case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
2171    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
2172      case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
2173      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
2174      case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
2175      case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
2176    default:    default:
2177      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
2178    }    }
2179    
2180    }
2181    
2182    const DataTypes::ValueType*
2183    DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
2184    {
2185    #ifdef _OPENMP
2186        int tid=omp_get_thread_num();
2187    #else
2188        int tid=0;
2189    #endif
2190        return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
2191    }
2192    
2193    
2194    // This needs to do the work of the identity constructor
2195    void
2196    DataLazy::resolveToIdentity()
2197    {
2198       if (m_op==IDENTITY)
2199        return;
2200    #ifndef LAZY_NODE_STORAGE
2201       DataReady_ptr p=resolveVectorWorker();
2202    #else
2203       DataReady_ptr p=resolveNodeWorker();
2204    #endif
2205       makeIdentity(p);
2206    }
2207    
2208    void DataLazy::makeIdentity(const DataReady_ptr& p)
2209    {
2210       m_op=IDENTITY;
2211       m_axis_offset=0;
2212       m_transpose=0;
2213       m_SL=m_SM=m_SR=0;
2214       m_children=m_height=0;
2215       m_id=p;
2216       if(p->isConstant()) {m_readytype='C';}
2217       else if(p->isExpanded()) {m_readytype='E';}
2218       else if (p->isTagged()) {m_readytype='T';}
2219       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
2220       m_buffsRequired=1;
2221       m_samplesize=p->getNumDPPSample()*p->getNoValues();
2222       m_maxsamplesize=m_samplesize;
2223       m_left.reset();
2224       m_right.reset();
2225    }
2226    
2227    
2228    DataReady_ptr
2229    DataLazy::resolve()
2230    {
2231        resolveToIdentity();
2232        return m_id;
2233    }
2234    
2235    #ifdef LAZY_NODE_STORAGE
2236    
2237    // This version of resolve uses storage in each node to hold results
2238    DataReady_ptr
2239    DataLazy::resolveNodeWorker()
2240    {
2241      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2242      {
2243        collapse();
2244      }
2245      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
2246      {
2247        return m_id;
2248      }
2249        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2250      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2251      ValueType& resvec=result->getVectorRW();
2252      DataReady_ptr resptr=DataReady_ptr(result);
2253    
2254      int sample;
2255      int totalsamples=getNumSamples();
2256      const ValueType* res=0;   // Storage for answer
2257    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2258      #pragma omp parallel for private(sample,res) schedule(static)
2259      for (sample=0;sample<totalsamples;++sample)
2260      {
2261        size_t roffset=0;
2262    #ifdef _OPENMP
2263        res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2264    #else
2265        res=resolveNodeSample(0,sample,roffset);
2266    #endif
2267    LAZYDEBUG(cout << "Sample #" << sample << endl;)
2268    LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2269        DataVector::size_type outoffset=result->getPointOffset(sample,0);
2270        memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
2271      }
2272      return resptr;
2273  }  }
2274    
2275    #endif // LAZY_NODE_STORAGE
2276    
2277  // To simplify the memory management, all threads operate on one large vector, rather than one each.  // To simplify the memory management, all threads operate on one large vector, rather than one each.
2278  // Each sample is evaluated independently and copied into the result DataExpanded.  // Each sample is evaluated independently and copied into the result DataExpanded.
2279  DataReady_ptr  DataReady_ptr
2280  DataLazy::resolve()  DataLazy::resolveVectorWorker()
2281  {  {
2282    
2283  cout << "Sample size=" << m_samplesize << endl;  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
2284  cout << "Buffers=" << m_buffsRequired << endl;  LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
   
2285    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2286    {    {
2287      collapse();      collapse();
# Line 738  cout << "Buffers=" << m_buffsRequired << Line 2291  cout << "Buffers=" << m_buffsRequired <<
2291      return m_id;      return m_id;
2292    }    }
2293      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2294    size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired));    // Each thread needs to have enough    size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough
2295      // storage to evaluate its expression      // storage to evaluate its expression
2296    int numthreads=1;    int numthreads=1;
2297  #ifdef _OPENMP  #ifdef _OPENMP
2298    numthreads=getNumberOfThreads();    numthreads=omp_get_max_threads();
   int threadnum=0;  
2299  #endif  #endif
2300    ValueType v(numthreads*threadbuffersize);    ValueType v(numthreads*threadbuffersize);
2301  cout << "Buffer created with size=" << v.size() << endl;  LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
2302    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2303    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
2304    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
2305    int sample;    int sample;
2306    size_t outoffset;     // offset in the output data    size_t outoffset;     // offset in the output data
2307    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
2308    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Vector storing the answer
2309    size_t resoffset=0;       // where in the vector to find the answer    size_t resoffset=0;       // where in the vector to find the answer
2310    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2311      #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
2312    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
2313    {    {
2314  cout << "################################# " << sample << endl;  LAZYDEBUG(cout << "################################# " << sample << endl;)
2315  #ifdef _OPENMP  #ifdef _OPENMP
2316      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
2317  #else  #else
2318      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
2319  #endif  #endif
2320  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
2321    LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
2322      outoffset=result->getPointOffset(sample,0);      outoffset=result->getPointOffset(sample,0);
2323  cerr << "offset=" << outoffset << endl;  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
2324      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
2325      {      {
2326    LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
2327      resvec[outoffset]=(*res)[resoffset];      resvec[outoffset]=(*res)[resoffset];
2328      }      }
2329  cerr << "*********************************" << endl;  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
2330    LAZYDEBUG(cerr << "*********************************" << endl;)
2331    }    }
2332    return resptr;    return resptr;
2333  }  }
# Line 789  DataLazy::toString() const Line 2345  DataLazy::toString() const
2345  void  void
2346  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
2347  {  {
2348    //    oss << "[" << m_children <<";"<<m_height <<"]";
2349    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2350    {    {
2351    case G_IDENTITY:    case G_IDENTITY:
# Line 818  DataLazy::intoString(ostringstream& oss) Line 2375  DataLazy::intoString(ostringstream& oss)
2375      oss << ')';      oss << ')';
2376      break;      break;
2377    case G_UNARY:    case G_UNARY:
2378      case G_UNARY_P:
2379      case G_NP1OUT:
2380      case G_NP1OUT_P:
2381        oss << opToString(m_op) << '(';
2382        m_left->intoString(oss);
2383        oss << ')';
2384        break;
2385      case G_TENSORPROD:
2386        oss << opToString(m_op) << '(';
2387        m_left->intoString(oss);
2388        oss << ", ";
2389        m_right->intoString(oss);
2390        oss << ')';
2391        break;
2392      case G_NP1OUT_2P:
2393      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
2394      m_left->intoString(oss);      m_left->intoString(oss);
2395        oss << ", " << m_axis_offset << ", " << m_transpose;
2396      oss << ')';      oss << ')';
2397      break;      break;
2398    default:    default:
# Line 827  DataLazy::intoString(ostringstream& oss) Line 2400  DataLazy::intoString(ostringstream& oss)
2400    }    }
2401  }  }
2402    
 // Note that in this case, deepCopy does not make copies of the leaves.  
 // Hopefully copy on write (or whatever we end up using) will take care of this.  
2403  DataAbstract*  DataAbstract*
2404  DataLazy::deepCopy()  DataLazy::deepCopy()
2405  {  {
2406    if (m_op==IDENTITY)    switch (getOpgroup(m_op))
2407    {    {
2408      return new DataLazy(m_left);    // we don't need to copy the child here    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2409      case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2410      case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2411      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2412      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2413      default:
2414        throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2415    }    }
   return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);  
2416  }  }
2417    
2418    
2419    // There is no single, natural interpretation of getLength on DataLazy.
2420    // Instances of DataReady can look at the size of their vectors.
2421    // For lazy though, it could be the size the data would be if it were resolved;
2422    // or it could be some function of the lengths of the DataReady instances which
2423    // form part of the expression.
2424    // Rather than have people making assumptions, I have disabled the method.
2425  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2426  DataLazy::getLength() const  DataLazy::getLength() const
2427  {  {
2428    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2429  }  }
2430    
2431    
# Line 853  DataLazy::getSlice(const DataTypes::Regi Line 2435  DataLazy::getSlice(const DataTypes::Regi
2435    throw DataException("getSlice - not implemented for Lazy objects.");    throw DataException("getSlice - not implemented for Lazy objects.");
2436  }  }
2437    
2438    
2439    // To do this we need to rely on our child nodes
2440    DataTypes::ValueType::size_type
2441    DataLazy::getPointOffset(int sampleNo,
2442                     int dataPointNo)
2443    {
2444      if (m_op==IDENTITY)
2445      {
2446        return m_id->getPointOffset(sampleNo,dataPointNo);
2447      }
2448      if (m_readytype!='E')
2449      {
2450        collapse();
2451        return m_id->getPointOffset(sampleNo,dataPointNo);
2452      }
2453      // at this point we do not have an identity node and the expression will be Expanded
2454      // so we only need to know which child to ask
2455      if (m_left->m_readytype=='E')
2456      {
2457        return m_left->getPointOffset(sampleNo,dataPointNo);
2458      }
2459      else
2460      {
2461        return m_right->getPointOffset(sampleNo,dataPointNo);
2462      }
2463    }
2464    
2465    // To do this we need to rely on our child nodes
2466  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2467  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2468                   int dataPointNo) const                   int dataPointNo) const
2469  {  {
2470    throw DataException("getPointOffset - not implemented for Lazy objects - yet.");    if (m_op==IDENTITY)
2471      {
2472        return m_id->getPointOffset(sampleNo,dataPointNo);
2473      }
2474      if (m_readytype=='E')
2475      {
2476        // at this point we do not have an identity node and the expression will be Expanded
2477        // so we only need to know which child to ask
2478        if (m_left->m_readytype=='E')
2479        {
2480        return m_left->getPointOffset(sampleNo,dataPointNo);
2481        }
2482        else
2483        {
2484        return m_right->getPointOffset(sampleNo,dataPointNo);
2485        }
2486      }
2487      if (m_readytype=='C')
2488      {
2489        return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
2490      }
2491      throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2492  }  }
2493    
2494  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2495  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2496  void  void
2497  DataLazy::setToZero()  DataLazy::setToZero()
2498  {  {
2499    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
2500    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2501    m_op=IDENTITY;  //   m_op=IDENTITY;
2502    m_right.reset();    //   m_right.reset();  
2503    m_left.reset();  //   m_left.reset();
2504    m_readytype='C';  //   m_readytype='C';
2505    m_buffsRequired=1;  //   m_buffsRequired=1;
2506    
2507      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2508      (int)privdebug;   // to stop the compiler complaining about unused privdebug
2509    }
2510    
2511    bool
2512    DataLazy::actsExpanded() const
2513    {
2514        return (m_readytype=='E');
2515  }  }
2516    
2517  }   // end namespace  }   // end namespace

Legend:
Removed from v.1910  
changed lines
  Added in v.2500

  ViewVC Help
Powered by ViewVC 1.1.26