/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2799 by jfenwick, Thu Dec 3 01:35:08 2009 UTC trunk/escriptcore/src/DataLazy.cpp revision 6168 by jfenwick, Wed Apr 13 03:08:12 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2009 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Apache License, version 2.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.apache.org/licenses/LICENSE-2.0
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
 #ifdef USE_NETCDF  
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
 #ifdef _OPENMP  
 #include <omp.h>  
 #endif  
 #include "FunctionSpace.h"  
 #include "DataTypes.h"  
18  #include "Data.h"  #include "Data.h"
19  #include "UnaryFuncs.h"     // for escript::fsign  #include "DataTypes.h"
20    #include "EscriptParams.h"
21    #include "FunctionSpace.h"
22  #include "Utils.h"  #include "Utils.h"
23    #include "DataVectorOps.h"
24    
25  #include "EscriptParams.h"  #include <iomanip> // for some fancy formatting in debug
26    
27  #include <iomanip>      // for some fancy formatting in debug  using namespace escript::DataTypes;
28    
29    #define NO_ARG
30    
31  // #define LAZYDEBUG(X) if (privdebug){X;}  // #define LAZYDEBUG(X) if (privdebug){X;}
32  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
# Line 60  A special operation, IDENTITY, stores an Line 56  A special operation, IDENTITY, stores an
56  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
57    
58  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
59  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
60    
61  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
62  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 68  I will refer to individual DataLazy obje Line 64  I will refer to individual DataLazy obje
64    
65  Each node also stores:  Each node also stores:
66  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
67      evaluated.          evaluated.
68  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
69      evaluate the expression.          evaluate the expression.
70  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
71    
72  When a new node is created, the above values are computed based on the values in the child nodes.  When a new node is created, the above values are computed based on the values in the child nodes.
# Line 131  std::vector<void*> stackend(getNumberOfT Line 127  std::vector<void*> stackend(getNumberOfT
127  size_t maxstackuse=0;  size_t maxstackuse=0;
128  #endif  #endif
129    
 enum ES_opgroup  
 {  
    G_UNKNOWN,  
    G_IDENTITY,  
    G_BINARY,        // pointwise operations with two arguments  
    G_UNARY,     // pointwise operations with one argument  
    G_UNARY_P,       // pointwise operations with one argument, requiring a parameter  
    G_NP1OUT,        // non-pointwise op with one output  
    G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter  
    G_TENSORPROD,    // general tensor product  
    G_NP1OUT_2P,     // non-pointwise op with one output requiring two params  
    G_REDUCTION      // non-pointwise unary op with a scalar output  
 };  
   
   
   
   
 string ES_opstrings[]={"UNKNOWN","IDENTITY","+","-","*","/","^",  
             "sin","cos","tan",  
             "asin","acos","atan","sinh","cosh","tanh","erf",  
             "asinh","acosh","atanh",  
             "log10","log","sign","abs","neg","pos","exp","sqrt",  
             "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",  
             "symmetric","nonsymmetric",  
             "prod",  
             "transpose", "trace",  
             "swapaxes",  
             "minval", "maxval"};  
 int ES_opcount=43;  
 ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  
             G_UNARY,G_UNARY,G_UNARY, //10  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17  
             G_UNARY,G_UNARY,G_UNARY,                    // 20  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28  
             G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35  
             G_NP1OUT,G_NP1OUT,  
             G_TENSORPROD,  
             G_NP1OUT_P, G_NP1OUT_P,  
             G_NP1OUT_2P,  
             G_REDUCTION, G_REDUCTION};  
 inline  
 ES_opgroup  
 getOpgroup(ES_optype op)  
 {  
   return opgroups[op];  
 }  
130    
131  // return the FunctionSpace of the result of "left op right"  // return the FunctionSpace of the result of "left op right"
132  FunctionSpace  FunctionSpace
133  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultFS(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
134  {  {
135      // perhaps this should call interpolate and throw or something?          // perhaps this should call interpolate and throw or something?
136      // maybe we need an interpolate node -          // maybe we need an interpolate node -
137      // that way, if interpolate is required in any other op we can just throw a          // that way, if interpolate is required in any other op we can just throw a
138      // programming error exception.          // programming error exception.
139    
140    FunctionSpace l=left->getFunctionSpace();    FunctionSpace l=left->getFunctionSpace();
141    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
142    if (l!=r)    if (l!=r)
143    {    {
144      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
145        if (res==1)
146      {      {
147      return l;          return l;
148      }      }
149      if (l.probeInterpolation(r))      if (res==-1)
150      {      {
151      return r;          return r;
152      }      }
153      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");      throw DataException("Cannot interpolate between the FunctionSpaces given for operation "+opToString(op)+".");
154    }    }
# Line 209  resultFS(DataAbstract_ptr left, DataAbst Line 160  resultFS(DataAbstract_ptr left, DataAbst
160  DataTypes::ShapeType  DataTypes::ShapeType
161  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
162  {  {
163      if (left->getShape()!=right->getShape())          if (left->getShape()!=right->getShape())
164      {          {
165        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))            if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
166        {            {
167          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");                  throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
168        }            }
169    
170        if (left->getRank()==0)   // we need to allow scalar * anything            if (left->getRank()==0)       // we need to allow scalar * anything
171        {            {
172          return right->getShape();                  return right->getShape();
173        }            }
174        if (right->getRank()==0)            if (right->getRank()==0)
175        {            {
176          return left->getShape();                  return left->getShape();
177        }            }
178        throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");            throw DataException("Shapes not the same - arguments must have matching shapes (or be scalars) for (point)binary operations on lazy data.");
179      }          }
180      return left->getShape();          return left->getShape();
181  }  }
182    
183  // return the shape for "op left"  // return the shape for "op left"
# Line 234  resultShape(DataAbstract_ptr left, DataA Line 185  resultShape(DataAbstract_ptr left, DataA
185  DataTypes::ShapeType  DataTypes::ShapeType
186  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)  resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
187  {  {
188      switch(op)          switch(op)
189      {          {
190          case TRANS:          case TRANS:
191         {            // for the scoping of variables             {                    // for the scoping of variables
192          const DataTypes::ShapeType& s=left->getShape();                  const DataTypes::ShapeType& s=left->getShape();
193          DataTypes::ShapeType sh;                  DataTypes::ShapeType sh;
194          int rank=left->getRank();                  int rank=left->getRank();
195          if (axis_offset<0 || axis_offset>rank)                  if (axis_offset<0 || axis_offset>rank)
196          {                  {
197              stringstream e;              stringstream e;
198              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
199              throw DataException(e.str());              throw DataException(e.str());
200          }          }
201          for (int i=0; i<rank; i++)          for (int i=0; i<rank; i++)
202          {                  {
203             int index = (axis_offset+i)%rank;                     int index = (axis_offset+i)%rank;
204             sh.push_back(s[index]); // Append to new shape             sh.push_back(s[index]); // Append to new shape
205          }          }
206          return sh;                  return sh;
207         }             }
208      break;          break;
209      case TRACE:          case TRACE:
210         {             {
211          int rank=left->getRank();                  int rank=left->getRank();
212          if (rank<2)                  if (rank<2)
213          {                  {
214             throw DataException("Trace can only be computed for objects with rank 2 or greater.");                     throw DataException("Trace can only be computed for objects with rank 2 or greater.");
215          }                  }
216          if ((axis_offset>rank-2) || (axis_offset<0))                  if ((axis_offset>rank-2) || (axis_offset<0))
217          {                  {
218             throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");                     throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
219          }                  }
220          if (rank==2)                  if (rank==2)
221          {                  {
222             return DataTypes::scalarShape;                     return DataTypes::scalarShape;
223          }                  }
224          else if (rank==3)                  else if (rank==3)
225          {                  {
226             DataTypes::ShapeType sh;                     DataTypes::ShapeType sh;
227                 if (axis_offset==0)                     if (axis_offset==0)
228             {                     {
229                  sh.push_back(left->getShape()[2]);                          sh.push_back(left->getShape()[2]);
230                 }                     }
231                 else     // offset==1                     else         // offset==1
232             {                     {
233              sh.push_back(left->getShape()[0]);                          sh.push_back(left->getShape()[0]);
234                 }                     }
235             return sh;                     return sh;
236          }                  }
237          else if (rank==4)                  else if (rank==4)
238          {                  {
239             DataTypes::ShapeType sh;                     DataTypes::ShapeType sh;
240             const DataTypes::ShapeType& s=left->getShape();                     const DataTypes::ShapeType& s=left->getShape();
241                 if (axis_offset==0)                     if (axis_offset==0)
242             {                     {
243                  sh.push_back(s[2]);                          sh.push_back(s[2]);
244                  sh.push_back(s[3]);                          sh.push_back(s[3]);
245                 }                     }
246                 else if (axis_offset==1)                     else if (axis_offset==1)
247             {                     {
248                  sh.push_back(s[0]);                          sh.push_back(s[0]);
249                  sh.push_back(s[3]);                          sh.push_back(s[3]);
250                 }                     }
251             else     // offset==2                     else         // offset==2
252             {                     {
253              sh.push_back(s[0]);                          sh.push_back(s[0]);
254              sh.push_back(s[1]);                          sh.push_back(s[1]);
255             }                     }
256             return sh;                     return sh;
257          }                  }
258          else        // unknown rank                  else            // unknown rank
259          {                  {
260             throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");                     throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
261          }                  }
262         }             }
263      break;          break;
264          default:          default:
265      throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");          throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
266      }          }
267  }  }
268    
269  DataTypes::ShapeType  DataTypes::ShapeType
# Line 368  SwapShape(DataAbstract_ptr left, const i Line 319  SwapShape(DataAbstract_ptr left, const i
319  DataTypes::ShapeType  DataTypes::ShapeType
320  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)  GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
321  {  {
322                
323    // Get rank and shape of inputs    // Get rank and shape of inputs
324    int rank0 = left->getRank();    int rank0 = left->getRank();
325    int rank1 = right->getRank();    int rank1 = right->getRank();
# Line 377  GTPShape(DataAbstract_ptr left, DataAbst Line 328  GTPShape(DataAbstract_ptr left, DataAbst
328    
329    // Prepare for the loops of the product and verify compatibility of shapes    // Prepare for the loops of the product and verify compatibility of shapes
330    int start0=0, start1=0;    int start0=0, start1=0;
331    if (transpose == 0)       {}    if (transpose == 0)           {}
332    else if (transpose == 1)  { start0 = axis_offset; }    else if (transpose == 1)      { start0 = axis_offset; }
333    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)      { start1 = rank1-axis_offset; }
334    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else                          { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
335    
336    if (rank0<axis_offset)    if (rank0<axis_offset)
337    {    {
338      throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");          throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
339    }    }
340    
341    // Adjust the shapes for transpose    // Adjust the shapes for transpose
342    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);        // pre-sizing the vectors rather
343    DataTypes::ShapeType tmpShape1(rank1);    // than using push_back    DataTypes::ShapeType tmpShape1(rank1);        // than using push_back
344    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }    for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
345    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }    for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
346    
347    // Prepare for the loops of the product    // Prepare for the loops of the product
348    SL=1, SM=1, SR=1;    SL=1, SM=1, SR=1;
349    for (int i=0; i<rank0-axis_offset; i++)   {    for (int i=0; i<rank0-axis_offset; i++)       {
350      SL *= tmpShape0[i];      SL *= tmpShape0[i];
351    }    }
352    for (int i=rank0-axis_offset; i<rank0; i++)   {    for (int i=rank0-axis_offset; i<rank0; i++)   {
353      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {      if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
354        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");        throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
355      }      }
356      SM *= tmpShape0[i];      SM *= tmpShape0[i];
357    }    }
358    for (int i=axis_offset; i<rank1; i++)     {    for (int i=axis_offset; i<rank1; i++)         {
359      SR *= tmpShape1[i];      SR *= tmpShape1[i];
360    }    }
361    
362    // Define the shape of the output (rank of shape is the sum of the loop ranges below)    // Define the shape of the output (rank of shape is the sum of the loop ranges below)
363    DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);      
364    {         // block to limit the scope of out_index    {                     // block to limit the scope of out_index
365       int out_index=0;       int out_index=0;
366       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
367       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
# Line 426  GTPShape(DataAbstract_ptr left, DataAbst Line 377  GTPShape(DataAbstract_ptr left, DataAbst
377    return shape2;    return shape2;
378  }  }
379    
380  }   // end anonymous namespace  }       // end anonymous namespace
   
   
   
 // Return a string representing the operation  
 const std::string&  
 opToString(ES_optype op)  
 {  
   if (op<0 || op>=ES_opcount)  
   {  
     op=UNKNOWNOP;  
   }  
   return ES_opstrings[op];  
 }  
381    
382  void DataLazy::LazyNodeSetup()  void DataLazy::LazyNodeSetup()
383  {  {
# Line 461  void DataLazy::LazyNodeSetup() Line 399  void DataLazy::LazyNodeSetup()
399    
400  // Creates an identity node  // Creates an identity node
401  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
402      : parent(p->getFunctionSpace(),p->getShape())          : parent(p->getFunctionSpace(),p->getShape())
403      ,m_sampleids(0),          ,m_sampleids(0),
404      m_samples(1)          m_samples(1)
405  {  {
406     if (p->isLazy())     if (p->isLazy())
407     {     {
408      // I don't want identity of Lazy.          // I don't want identity of Lazy.
409      // Question: Why would that be so bad?          // Question: Why would that be so bad?
410      // Answer: We assume that the child of ID is something we can call getVector on          // Answer: We assume that the child of ID is something we can call getVector on
411      throw DataException("Programmer error - attempt to create identity from a DataLazy.");          throw DataException("Programmer error - attempt to create identity from a DataLazy.");
412     }     }
413     else     else
414     {     {
415      p->makeLazyShared();          DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
416      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);          makeIdentity(dr);
     makeIdentity(dr);  
417  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
418     }     }
419  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
420  }  }
421    
422  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
423      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),          : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
424      m_op(op),          m_op(op),
425      m_axis_offset(0),          m_axis_offset(0),
426      m_transpose(0),          m_transpose(0),
427      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
428  {  {
429     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
430     {     {
431      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
432     }     }
433    
434     DataLazy_ptr lleft;     DataLazy_ptr lleft;
435     if (!left->isLazy())     if (!left->isLazy())
436     {     {
437      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
438     }     }
439     else     else
440     {     {
441      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
442     }     }
443     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
444     m_left=lleft;     m_left=lleft;
# Line 515  DataLazy::DataLazy(DataAbstract_ptr left Line 452  DataLazy::DataLazy(DataAbstract_ptr left
452    
453  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
454  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
455      : parent(resultFS(left,right,op), resultShape(left,right,op)),          : parent(resultFS(left,right,op), resultShape(left,right,op)),
456      m_op(op),          m_op(op),
457      m_SL(0), m_SM(0), m_SR(0)          m_SL(0), m_SM(0), m_SR(0)
458  {  {
459  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
460     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
461     {     {
462      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");          throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
463     }     }
464    
465     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
466     {     {
467      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
468      Data ltemp(left);          Data ltemp(left);
469      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
470      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
471     }     }
472     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
473     {     {
474      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
475      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
476  LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)  LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
477     }     }
478     left->operandCheck(*right);     left->operandCheck(*right);
479    
480     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
481     {     {
482      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
483  LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)  LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
484     }     }
485     else     else
486     {     {
487      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
488  LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)  LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
489     }     }
490     if (right->isLazy())     if (right->isLazy())
491     {     {
492      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
493  LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)  LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
494     }     }
495     else     else
496     {     {
497      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
498  LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)  LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
499     }     }
500     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
501     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
502     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
503     {     {
504      m_readytype='E';          m_readytype='E';
505     }     }
506     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
507     {     {
508      m_readytype='T';          m_readytype='T';
509     }     }
510     else     else
511     {     {
512      m_readytype='C';          m_readytype='C';
513     }     }
514     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
515     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
# Line 583  LAZYDEBUG(cout << "(3)Lazy created with Line 520  LAZYDEBUG(cout << "(3)Lazy created with
520  }  }
521    
522  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
523      : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),          : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
524      m_op(op),          m_op(op),
525      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
526      m_transpose(transpose)          m_transpose(transpose)
527  {  {
528     if ((getOpgroup(op)!=G_TENSORPROD))     if ((getOpgroup(op)!=G_TENSORPROD))
529     {     {
530      throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
531     }     }
532     if ((transpose>2) || (transpose<0))     if ((transpose>2) || (transpose<0))
533     {     {
534      throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");          throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
535     }     }
536     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated     if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
537     {     {
538      FunctionSpace fs=getFunctionSpace();          FunctionSpace fs=getFunctionSpace();
539      Data ltemp(left);          Data ltemp(left);
540      Data tmp(ltemp,fs);          Data tmp(ltemp,fs);
541      left=tmp.borrowDataPtr();          left=tmp.borrowDataPtr();
542     }     }
543     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
544     {     {
545      Data tmp(Data(right),getFunctionSpace());          Data tmp(Data(right),getFunctionSpace());
546      right=tmp.borrowDataPtr();          right=tmp.borrowDataPtr();
547     }     }
548  //    left->operandCheck(*right);  //    left->operandCheck(*right);
549    
550     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())                  // the children need to be DataLazy. Wrap them in IDENTITY if required
551     {     {
552      m_left=dynamic_pointer_cast<DataLazy>(left);          m_left=dynamic_pointer_cast<DataLazy>(left);
553     }     }
554     else     else
555     {     {
556      m_left=DataLazy_ptr(new DataLazy(left));          m_left=DataLazy_ptr(new DataLazy(left));
557     }     }
558     if (right->isLazy())     if (right->isLazy())
559     {     {
560      m_right=dynamic_pointer_cast<DataLazy>(right);          m_right=dynamic_pointer_cast<DataLazy>(right);
561     }     }
562     else     else
563     {     {
564      m_right=DataLazy_ptr(new DataLazy(right));          m_right=DataLazy_ptr(new DataLazy(right));
565     }     }
566     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
567     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
568     if (lt=='E' || rt=='E')     if (lt=='E' || rt=='E')
569     {     {
570      m_readytype='E';          m_readytype='E';
571     }     }
572     else if (lt=='T' || rt=='T')     else if (lt=='T' || rt=='T')
573     {     {
574      m_readytype='T';          m_readytype='T';
575     }     }
576     else     else
577     {     {
578      m_readytype='C';          m_readytype='C';
579     }     }
580     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
581     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
# Line 650  LAZYDEBUG(cout << "(4)Lazy created with Line 587  LAZYDEBUG(cout << "(4)Lazy created with
587    
588    
589  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
590      : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),          : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
591      m_op(op),          m_op(op),
592      m_axis_offset(axis_offset),          m_axis_offset(axis_offset),
593      m_transpose(0),          m_transpose(0),
594      m_tol(0)          m_tol(0)
595  {  {
596     if ((getOpgroup(op)!=G_NP1OUT_P))     if ((getOpgroup(op)!=G_NP1OUT_P))
597     {     {
598      throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
599     }     }
600     DataLazy_ptr lleft;     DataLazy_ptr lleft;
601     if (!left->isLazy())     if (!left->isLazy())
602     {     {
603      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
604     }     }
605     else     else
606     {     {
607      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
608     }     }
609     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
610     m_left=lleft;     m_left=lleft;
# Line 680  LAZYDEBUG(cout << "(5)Lazy created with Line 617  LAZYDEBUG(cout << "(5)Lazy created with
617  }  }
618    
619  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
620      : parent(left->getFunctionSpace(), left->getShape()),          : parent(left->getFunctionSpace(), left->getShape()),
621      m_op(op),          m_op(op),
622      m_axis_offset(0),          m_axis_offset(0),
623      m_transpose(0),          m_transpose(0),
624      m_tol(tol)          m_tol(tol)
625  {  {
626     if ((getOpgroup(op)!=G_UNARY_P))     if ((getOpgroup(op)!=G_UNARY_P))
627     {     {
628      throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
629     }     }
630     DataLazy_ptr lleft;     DataLazy_ptr lleft;
631     if (!left->isLazy())     if (!left->isLazy())
632     {     {
633      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
634     }     }
635     else     else
636     {     {
637      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
638     }     }
639     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
640     m_left=lleft;     m_left=lleft;
# Line 711  LAZYDEBUG(cout << "(6)Lazy created with Line 648  LAZYDEBUG(cout << "(6)Lazy created with
648    
649    
650  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
651      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),          : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
652      m_op(op),          m_op(op),
653      m_axis_offset(axis0),          m_axis_offset(axis0),
654      m_transpose(axis1),          m_transpose(axis1),
655      m_tol(0)          m_tol(0)
656  {  {
657     if ((getOpgroup(op)!=G_NP1OUT_2P))     if ((getOpgroup(op)!=G_NP1OUT_2P))
658     {     {
659      throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");          throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
660     }     }
661     DataLazy_ptr lleft;     DataLazy_ptr lleft;
662     if (!left->isLazy())     if (!left->isLazy())
663     {     {
664      lleft=DataLazy_ptr(new DataLazy(left));          lleft=DataLazy_ptr(new DataLazy(left));
665     }     }
666     else     else
667     {     {
668      lleft=dynamic_pointer_cast<DataLazy>(left);          lleft=dynamic_pointer_cast<DataLazy>(left);
669     }     }
670     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
671     m_left=lleft;     m_left=lleft;
# Line 740  DataLazy::DataLazy(DataAbstract_ptr left Line 677  DataLazy::DataLazy(DataAbstract_ptr left
677  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
678  }  }
679    
680    
681    namespace
682    {
683    
684        inline int max3(int a, int b, int c)
685        {
686            int t=(a>b?a:b);
687            return (t>c?t:c);
688    
689        }
690    }
691    
692    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
693            : parent(left->getFunctionSpace(), left->getShape()),
694            m_op(CONDEVAL),
695            m_axis_offset(0),
696            m_transpose(0),
697            m_tol(0)
698    {
699    
700       DataLazy_ptr lmask;
701       DataLazy_ptr lleft;
702       DataLazy_ptr lright;
703       if (!mask->isLazy())
704       {
705            lmask=DataLazy_ptr(new DataLazy(mask));
706       }
707       else
708       {
709            lmask=dynamic_pointer_cast<DataLazy>(mask);
710       }
711       if (!left->isLazy())
712       {
713            lleft=DataLazy_ptr(new DataLazy(left));
714       }
715       else
716       {
717            lleft=dynamic_pointer_cast<DataLazy>(left);
718       }
719       if (!right->isLazy())
720       {
721            lright=DataLazy_ptr(new DataLazy(right));
722       }
723       else
724       {
725            lright=dynamic_pointer_cast<DataLazy>(right);
726       }
727       m_readytype=lmask->m_readytype;
728       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
729       {
730            throw DataException("Programmer Error - condEval arguments must have the same readytype");
731       }
732       m_left=lleft;
733       m_right=lright;
734       m_mask=lmask;
735       m_samplesize=getNumDPPSample()*getNoValues();
736       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
737       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
738       LazyNodeSetup();
739       SIZELIMIT
740    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
741    }
742    
743    
744    
745  DataLazy::~DataLazy()  DataLazy::~DataLazy()
746  {  {
747     delete[] m_sampleids;     delete[] m_sampleids;
# Line 752  DataLazy::~DataLazy() Line 754  DataLazy::~DataLazy()
754    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
755  */  */
756  DataReady_ptr  DataReady_ptr
757  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
758  {  {
759    if (m_readytype=='E')    if (m_readytype=='E')
760    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
761      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
762    }    }
763    if (m_op==IDENTITY)    if (m_op==IDENTITY)
# Line 773  DataLazy::collapseToReady() Line 775  DataLazy::collapseToReady()
775    switch(m_op)    switch(m_op)
776    {    {
777      case ADD:      case ADD:
778      result=left+right;          result=left+right;
779      break;          break;
780      case SUB:            case SUB:          
781      result=left-right;          result=left-right;
782      break;          break;
783      case MUL:            case MUL:          
784      result=left*right;          result=left*right;
785      break;          break;
786      case DIV:            case DIV:          
787      result=left/right;          result=left/right;
788      break;          break;
789      case SIN:      case SIN:
790      result=left.sin();            result=left.sin();      
791      break;          break;
792      case COS:      case COS:
793      result=left.cos();          result=left.cos();
794      break;          break;
795      case TAN:      case TAN:
796      result=left.tan();          result=left.tan();
797      break;          break;
798      case ASIN:      case ASIN:
799      result=left.asin();          result=left.asin();
800      break;          break;
801      case ACOS:      case ACOS:
802      result=left.acos();          result=left.acos();
803      break;          break;
804      case ATAN:      case ATAN:
805      result=left.atan();          result=left.atan();
806      break;          break;
807      case SINH:      case SINH:
808      result=left.sinh();          result=left.sinh();
809      break;          break;
810      case COSH:      case COSH:
811      result=left.cosh();          result=left.cosh();
812      break;          break;
813      case TANH:      case TANH:
814      result=left.tanh();          result=left.tanh();
815      break;          break;
816      case ERF:      case ERF:
817      result=left.erf();          result=left.erf();
818      break;          break;
819     case ASINH:     case ASINH:
820      result=left.asinh();          result=left.asinh();
821      break;          break;
822     case ACOSH:     case ACOSH:
823      result=left.acosh();          result=left.acosh();
824      break;          break;
825     case ATANH:     case ATANH:
826      result=left.atanh();          result=left.atanh();
827      break;          break;
828      case LOG10:      case LOG10:
829      result=left.log10();          result=left.log10();
830      break;          break;
831      case LOG:      case LOG:
832      result=left.log();          result=left.log();
833      break;          break;
834      case SIGN:      case SIGN:
835      result=left.sign();          result=left.sign();
836      break;          break;
837      case ABS:      case ABS:
838      result=left.abs();          result=left.abs();
839      break;          break;
840      case NEG:      case NEG:
841      result=left.neg();          result=left.neg();
842      break;          break;
843      case POS:      case POS:
844      // it doesn't mean anything for delayed.          // it doesn't mean anything for delayed.
845      // it will just trigger a deep copy of the lazy object          // it will just trigger a deep copy of the lazy object
846      throw DataException("Programmer error - POS not supported for lazy data.");          throw DataException("Programmer error - POS not supported for lazy data.");
847      break;          break;
848      case EXP:      case EXP:
849      result=left.exp();          result=left.exp();
850      break;          break;
851      case SQRT:      case SQRT:
852      result=left.sqrt();          result=left.sqrt();
853      break;          break;
854      case RECIP:      case RECIP:
855      result=left.oneOver();          result=left.oneOver();
856      break;          break;
857      case GZ:      case GZ:
858      result=left.wherePositive();          result=left.wherePositive();
859      break;          break;
860      case LZ:      case LZ:
861      result=left.whereNegative();          result=left.whereNegative();
862      break;          break;
863      case GEZ:      case GEZ:
864      result=left.whereNonNegative();          result=left.whereNonNegative();
865      break;          break;
866      case LEZ:      case LEZ:
867      result=left.whereNonPositive();          result=left.whereNonPositive();
868      break;          break;
869      case NEZ:      case NEZ:
870      result=left.whereNonZero(m_tol);          result=left.whereNonZero(m_tol);
871      break;          break;
872      case EZ:      case EZ:
873      result=left.whereZero(m_tol);          result=left.whereZero(m_tol);
874      break;          break;
875      case SYM:      case SYM:
876      result=left.symmetric();          result=left.symmetric();
877      break;          break;
878      case NSYM:      case NSYM:
879      result=left.nonsymmetric();          result=left.antisymmetric();
880      break;          break;
881      case PROD:      case PROD:
882      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);          result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
883      break;          break;
884      case TRANS:      case TRANS:
885      result=left.transpose(m_axis_offset);          result=left.transpose(m_axis_offset);
886      break;          break;
887      case TRACE:      case TRACE:
888      result=left.trace(m_axis_offset);          result=left.trace(m_axis_offset);
889      break;          break;
890      case SWAP:      case SWAP:
891      result=left.swapaxes(m_axis_offset, m_transpose);          result=left.swapaxes(m_axis_offset, m_transpose);
892      break;          break;
893      case MINVAL:      case MINVAL:
894      result=left.minval();          result=left.minval();
895      break;          break;
896      case MAXVAL:      case MAXVAL:
897      result=left.minval();          result=left.minval();
898            break;
899        case HER:
900        result=left.hermitian();
901      break;      break;
902      default:      default:
903      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
904    }    }
905    return result.borrowReadyPtr();    return result.borrowReadyPtr();
906  }  }
# Line 907  DataLazy::collapseToReady() Line 912  DataLazy::collapseToReady()
912     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
913  */  */
914  void  void
915  DataLazy::collapse()  DataLazy::collapse() const
916  {  {
917    if (m_op==IDENTITY)    if (m_op==IDENTITY)
918    {    {
919      return;          return;
920    }    }
921    if (m_readytype=='E')    if (m_readytype=='E')
922    { // this is more an efficiency concern than anything else    {     // this is more an efficiency concern than anything else
923      throw DataException("Programmer Error - do not use collapse on Expanded data.");      throw DataException("Programmer Error - do not use collapse on Expanded data.");
924    }    }
925    m_id=collapseToReady();    m_id=collapseToReady();
926    m_op=IDENTITY;    m_op=IDENTITY;
927  }  }
928    
   
   
   
   
   
 #define PROC_OP(TYPE,X)                               \  
     for (int j=0;j<onumsteps;++j)\  
     {\  
       for (int i=0;i<numsteps;++i,resultp+=resultStep) \  
       { \  
 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\  
 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\  
          tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  
 LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \  
          lroffset+=leftstep; \  
          rroffset+=rightstep; \  
       }\  
       lroffset+=oleftstep;\  
       rroffset+=orightstep;\  
     }  
   
   
929  // The result will be stored in m_samples  // The result will be stored in m_samples
930  // The return value is a pointer to the DataVector, offset is the offset within the return value  // The return value is a pointer to the DataVector, offset is the offset within the return value
931  const DataTypes::ValueType*  const DataTypes::RealVectorType*
932  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
933  {  {
934  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
935      // collapse so we have a 'E' node or an IDENTITY for some other type          // collapse so we have a 'E' node or an IDENTITY for some other type
936    if (m_readytype!='E' && m_op!=IDENTITY)    if (m_readytype!='E' && m_op!=IDENTITY)
937    {    {
938      collapse();          collapse();
939    }    }
940    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
941    {    {
942      const ValueType& vec=m_id->getVectorRO();      const RealVectorType& vec=m_id->getVectorRO();
943      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
944  #ifdef LAZY_STACK_PROF  #ifdef LAZY_STACK_PROF
945  int x;  int x;
# Line 973  if (&x<stackend[omp_get_thread_num()]) Line 956  if (&x<stackend[omp_get_thread_num()])
956    }    }
957    if (m_sampleids[tid]==sampleNo)    if (m_sampleids[tid]==sampleNo)
958    {    {
959      roffset=tid*m_samplesize;          roffset=tid*m_samplesize;
960      return &(m_samples);        // sample is already resolved          return &(m_samples);            // sample is already resolved
961    }    }
962    m_sampleids[tid]=sampleNo;    m_sampleids[tid]=sampleNo;
963    
964    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
965    {    {
966    case G_UNARY:    case G_UNARY:
# Line 987  if (&x<stackend[omp_get_thread_num()]) Line 971  if (&x<stackend[omp_get_thread_num()])
971    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
972    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
973    case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);    case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
974      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
975    default:    default:
976      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
977    }    }
978  }  }
979    
980  const DataTypes::ValueType*  const DataTypes::RealVectorType*
981  DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
982  {  {
983      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
984      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
985      // processing single points.          // processing single points.
986      // we will also know we won't get identity nodes          // we will also know we won't get identity nodes
987    if (m_readytype!='E')    if (m_readytype!='E')
988    {    {
989      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
# Line 1007  DataLazy::resolveNodeUnary(int tid, int Line 992  DataLazy::resolveNodeUnary(int tid, int
992    {    {
993      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
994    }    }
995    const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);    const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
996    const double* left=&((*leftres)[roffset]);    const double* left=&((*leftres)[roffset]);
997    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
998    double* result=&(m_samples[roffset]);    double* result=&(m_samples[roffset]);
999    switch (m_op)    if (m_op==POS)
1000    {    {
1001      case SIN:        // this should be prevented earlier
1002      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);      // operation is meaningless for lazy
1003      break;          throw DataException("Programmer error - POS not supported for lazy data.");    
1004      case COS:    }
1005      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);    tensor_unary_array_operation(m_samplesize,
1006      break;                               left,
1007      case TAN:                               result,
1008      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);                               m_op,
1009      break;                               m_tol);  
     case ASIN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);  
     break;  
     case ACOS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);  
     break;  
     case ATAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);  
     break;  
     case SINH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);  
     break;  
     case COSH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);  
     break;  
     case TANH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);  
     break;  
     case ERF:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
    case ASINH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
    case ACOSH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
    case ATANH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
     case LOG10:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);  
     break;  
     case LOG:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);  
     break;  
     case SIGN:  
     tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
     break;  
     case ABS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);  
     break;  
     case NEG:  
     tensor_unary_operation(m_samplesize, left, result, negate<double>());  
     break;  
     case POS:  
     // it doesn't mean anything for delayed.  
     // it will just trigger a deep copy of the lazy object  
     throw DataException("Programmer error - POS not supported for lazy data.");  
     break;  
     case EXP:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);  
     break;  
     case SQRT:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);  
     break;  
     case RECIP:  
     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
     break;  
     case GZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
     break;  
     case LZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
     break;  
     case GEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
     break;  
     case LEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
     break;  
 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  
     case NEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));  
     break;  
     case EZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));  
     break;  
   
     default:  
     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");  
   }  
1010    return &(m_samples);    return &(m_samples);
1011  }  }
1012    
1013    
1014  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1015  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1016  {  {
1017      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1018      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1019      // processing single points.          // processing single points.
1020      // we will also know we won't get identity nodes          // we will also know we won't get identity nodes
1021    if (m_readytype!='E')    if (m_readytype!='E')
1022    {    {
1023      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
# Line 1140  DataLazy::resolveNodeReduction(int tid, Line 1027  DataLazy::resolveNodeReduction(int tid,
1027      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1028    }    }
1029    size_t loffset=0;    size_t loffset=0;
1030    const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);    const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1031    
1032    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1033    unsigned int ndpps=getNumDPPSample();    unsigned int ndpps=getNumDPPSample();
1034    unsigned int psize=DataTypes::noValues(getShape());    unsigned int psize=DataTypes::noValues(m_left->getShape());
1035    double* result=&(m_samples[roffset]);    double* result=&(m_samples[roffset]);
1036    switch (m_op)    switch (m_op)
1037    {    {
1038      case MINVAL:      case MINVAL:
1039      {          {
1040        for (unsigned int z=0;z<ndpps;++z)            for (unsigned int z=0;z<ndpps;++z)
1041        {            {
1042          FMin op;              FMin op;
1043          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());              *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1044          loffset+=psize;              loffset+=psize;
1045          result++;              result++;
1046        }            }
1047      }          }
1048      break;          break;
1049      case MAXVAL:      case MAXVAL:
1050      {          {
1051        for (unsigned int z=0;z<ndpps;++z)            for (unsigned int z=0;z<ndpps;++z)
1052        {            {
1053        FMax op;            FMax op;
1054        *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);            *result=escript::reductionOpVector(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1055        loffset+=psize;            loffset+=psize;
1056        result++;            result++;
1057        }            }
1058      }          }
1059      break;          break;
1060      default:      default:
1061      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1062    }    }
1063    return &(m_samples);    return &(m_samples);
1064  }  }
1065    
1066  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1067  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1068  {  {
1069      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1070      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1071      // processing single points.          // processing single points.
1072    if (m_readytype!='E')    if (m_readytype!='E')
1073    {    {
1074      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
# Line 1191  DataLazy::resolveNodeNP1OUT(int tid, int Line 1078  DataLazy::resolveNodeNP1OUT(int tid, int
1078      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1079    }    }
1080    size_t subroffset;    size_t subroffset;
1081    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1082    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1083    size_t loop=0;    size_t loop=0;
1084    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
# Line 1200  DataLazy::resolveNodeNP1OUT(int tid, int Line 1087  DataLazy::resolveNodeNP1OUT(int tid, int
1087    switch (m_op)    switch (m_op)
1088    {    {
1089      case SYM:      case SYM:
1090      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1091      {          {
1092          DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);              escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1093          subroffset+=step;              subroffset+=step;
1094          offset+=step;              offset+=step;
1095      }          }
1096      break;          break;
1097      case NSYM:      case NSYM:
1098      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1099      {          {
1100          DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);              escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1101          subroffset+=step;              subroffset+=step;
1102          offset+=step;              offset+=step;
1103      }          }
1104      break;          break;
1105      default:      default:
1106      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1107    }    }
1108    return &m_samples;    return &m_samples;
1109  }  }
1110    
1111  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1112  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1113  {  {
1114      // we assume that any collapsing has been done before we get here          // we assume that any collapsing has been done before we get here
1115      // since we only have one argument we don't need to think about only          // since we only have one argument we don't need to think about only
1116      // processing single points.          // processing single points.
1117    if (m_readytype!='E')    if (m_readytype!='E')
1118    {    {
1119      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
# Line 1237  DataLazy::resolveNodeNP1OUT_P(int tid, i Line 1124  DataLazy::resolveNodeNP1OUT_P(int tid, i
1124    }    }
1125    size_t subroffset;    size_t subroffset;
1126    size_t offset;    size_t offset;
1127    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1128    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1129    offset=roffset;    offset=roffset;
1130    size_t loop=0;    size_t loop=0;
# Line 1247  DataLazy::resolveNodeNP1OUT_P(int tid, i Line 1134  DataLazy::resolveNodeNP1OUT_P(int tid, i
1134    switch (m_op)    switch (m_op)
1135    {    {
1136      case TRACE:      case TRACE:
1137      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1138      {          {
1139              DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);              escript::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1140          subroffset+=instep;              subroffset+=instep;
1141          offset+=outstep;              offset+=outstep;
1142      }          }
1143      break;          break;
1144      case TRANS:      case TRANS:
1145      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1146      {          {
1147              DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);              escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1148          subroffset+=instep;              subroffset+=instep;
1149          offset+=outstep;              offset+=outstep;
1150      }          }
1151      break;          break;
1152      default:      default:
1153      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1154    }    }
1155    return &m_samples;    return &m_samples;
1156  }  }
1157    
1158    
1159  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1160  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1161  {  {
1162    if (m_readytype!='E')    if (m_readytype!='E')
1163    {    {
# Line 1282  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1169  DataLazy::resolveNodeNP1OUT_2P(int tid,
1169    }    }
1170    size_t subroffset;    size_t subroffset;
1171    size_t offset;    size_t offset;
1172    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);    const RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1173    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1174    offset=roffset;    offset=roffset;
1175    size_t loop=0;    size_t loop=0;
# Line 1292  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1179  DataLazy::resolveNodeNP1OUT_2P(int tid,
1179    switch (m_op)    switch (m_op)
1180    {    {
1181      case SWAP:      case SWAP:
1182      for (loop=0;loop<numsteps;++loop)          for (loop=0;loop<numsteps;++loop)
1183      {          {
1184              DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);              escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1185          subroffset+=instep;              subroffset+=instep;
1186          offset+=outstep;              offset+=outstep;
1187      }          }
1188      break;          break;
1189      default:      default:
1190      throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1191    }    }
1192    return &m_samples;    return &m_samples;
1193  }  }
1194    
1195    const DataTypes::RealVectorType*
1196    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1197    {
1198      if (m_readytype!='E')
1199      {
1200        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1201      }
1202      if (m_op!=CONDEVAL)
1203      {
1204        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1205      }
1206      size_t subroffset;
1207    
1208      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1209      const RealVectorType* srcres=0;
1210      if ((*maskres)[subroffset]>0)
1211      {
1212            srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1213      }
1214      else
1215      {
1216            srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1217      }
1218    
1219      // Now we need to copy the result
1220    
1221      roffset=m_samplesize*tid;
1222      for (int i=0;i<m_samplesize;++i)
1223      {
1224            m_samples[roffset+i]=(*srcres)[subroffset+i];  
1225      }
1226    
1227      return &m_samples;
1228    }
1229    
1230  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1231  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
# Line 1316  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1236  DataLazy::resolveNodeNP1OUT_2P(int tid,
1236  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1237  // For example, 2+Vector.  // For example, 2+Vector.
1238  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1239  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1240  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1241  {  {
1242  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1243    
1244    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1245      // first work out which of the children are expanded          // first work out which of the children are expanded
1246    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1247    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1248    if (!leftExp && !rightExp)    if (!leftExp && !rightExp)
1249    {    {
1250      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");          throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1251    }    }
1252    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1253    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1254    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1255    {    {
1256      throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1257    }    }
1258    size_t leftsize=m_left->getNoValues();    size_t leftsize=m_left->getNoValues();
1259    size_t rightsize=m_right->getNoValues();    size_t rightsize=m_right->getNoValues();
1260    size_t chunksize=1;           // how many doubles will be processed in one go    size_t chunksize=1;                   // how many doubles will be processed in one go
1261    int leftstep=0;       // how far should the left offset advance after each step    int leftstep=0;               // how far should the left offset advance after each step
1262    int rightstep=0;    int rightstep=0;
1263    int numsteps=0;       // total number of steps for the inner loop    int numsteps=0;               // total number of steps for the inner loop
1264    int oleftstep=0;  // the o variables refer to the outer loop    int oleftstep=0;      // the o variables refer to the outer loop
1265    int orightstep=0; // The outer loop is only required in cases where there is an extended scalar    int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1266    int onumsteps=1;    int onumsteps=1;
1267        
1268    bool LES=(leftExp && leftScalar); // Left is an expanded scalar    bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1269    bool RES=(rightExp && rightScalar);    bool RES=(rightExp && rightScalar);
1270    bool LS=(!leftExp && leftScalar); // left is a single scalar    bool LS=(!leftExp && leftScalar);     // left is a single scalar
1271    bool RS=(!rightExp && rightScalar);    bool RS=(!rightExp && rightScalar);
1272    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar    bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1273    bool RN=(!rightExp && !rightScalar);    bool RN=(!rightExp && !rightScalar);
1274    bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar    bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1275    bool REN=(rightExp && !rightScalar);    bool REN=(rightExp && !rightScalar);
1276    
1277    if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars    if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1278    {    {
1279      chunksize=m_left->getNumDPPSample()*leftsize;          chunksize=m_left->getNumDPPSample()*leftsize;
1280      leftstep=0;          leftstep=0;
1281      rightstep=0;          rightstep=0;
1282      numsteps=1;          numsteps=1;
1283    }    }
1284    else if (LES || RES)    else if (LES || RES)
1285    {    {
1286      chunksize=1;          chunksize=1;
1287      if (LES)        // left is an expanded scalar          if (LES)                // left is an expanded scalar
1288      {          {
1289          if (RS)                  if (RS)
1290          {                  {
1291             leftstep=1;                     leftstep=1;
1292             rightstep=0;                     rightstep=0;
1293             numsteps=m_left->getNumDPPSample();                     numsteps=m_left->getNumDPPSample();
1294          }                  }
1295          else        // RN or REN                  else            // RN or REN
1296          {                  {
1297             leftstep=0;                     leftstep=0;
1298             oleftstep=1;                     oleftstep=1;
1299             rightstep=1;                     rightstep=1;
1300             orightstep=(RN ? -(int)rightsize : 0);                     orightstep=(RN ? -(int)rightsize : 0);
1301             numsteps=rightsize;                     numsteps=rightsize;
1302             onumsteps=m_left->getNumDPPSample();                     onumsteps=m_left->getNumDPPSample();
1303          }                  }
1304      }          }
1305      else        // right is an expanded scalar          else            // right is an expanded scalar
1306      {          {
1307          if (LS)                  if (LS)
1308          {                  {
1309             rightstep=1;                     rightstep=1;
1310             leftstep=0;                     leftstep=0;
1311             numsteps=m_right->getNumDPPSample();                     numsteps=m_right->getNumDPPSample();
1312          }                  }
1313          else                  else
1314          {                  {
1315             rightstep=0;                     rightstep=0;
1316             orightstep=1;                     orightstep=1;
1317             leftstep=1;                     leftstep=1;
1318             oleftstep=(LN ? -(int)leftsize : 0);                     oleftstep=(LN ? -(int)leftsize : 0);
1319             numsteps=leftsize;                     numsteps=leftsize;
1320             onumsteps=m_right->getNumDPPSample();                     onumsteps=m_right->getNumDPPSample();
1321          }                  }
1322      }          }
1323    }    }
1324    else  // this leaves (LEN, RS), (LEN, RN) and their transposes    else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1325    {    {
1326      if (LEN)    // and Right will be a single value          if (LEN)        // and Right will be a single value
1327      {          {
1328          chunksize=rightsize;                  chunksize=rightsize;
1329          leftstep=rightsize;                  leftstep=rightsize;
1330          rightstep=0;                  rightstep=0;
1331          numsteps=m_left->getNumDPPSample();                  numsteps=m_left->getNumDPPSample();
1332          if (RS)                  if (RS)
1333          {                  {
1334             numsteps*=leftsize;                     numsteps*=leftsize;
1335          }                  }
1336      }          }
1337      else    // REN          else    // REN
1338      {          {
1339          chunksize=leftsize;                  chunksize=leftsize;
1340          rightstep=leftsize;                  rightstep=leftsize;
1341          leftstep=0;                  leftstep=0;
1342          numsteps=m_right->getNumDPPSample();                  numsteps=m_right->getNumDPPSample();
1343          if (LS)                  if (LS)
1344          {                  {
1345             numsteps*=rightsize;                     numsteps*=rightsize;
1346          }                  }
1347      }          }
1348    }    }
1349    
1350    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0    int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1351      // Get the values of sub-expressions          // Get the values of sub-expressions
1352    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      const RealVectorType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);      
1353    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);    const RealVectorType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1354  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1355  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1356  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
# Line 1444  LAZYDEBUG(cout << "Right res["<< rroffse Line 1364  LAZYDEBUG(cout << "Right res["<< rroffse
1364    
1365    
1366    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1367    double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved    double* resultp=&(m_samples[roffset]);                // results are stored at the vector offset we received
1368    switch(m_op)    switch(m_op)
1369    {    {
1370      case ADD:      case ADD:
1371          PROC_OP(NO_ARG,plus<double>());          //PROC_OP(NO_ARG,plus<double>());
1372      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1373                 &(*left)[0],
1374                 &(*right)[0],
1375                 chunksize,
1376                 onumsteps,
1377                 numsteps,
1378                 resultStep,
1379                 leftstep,
1380                 rightstep,
1381                 oleftstep,
1382                 orightstep,
1383                 lroffset,
1384                 rroffset,
1385                 escript::ES_optype::ADD);  
1386            break;
1387      case SUB:      case SUB:
1388      PROC_OP(NO_ARG,minus<double>());        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1389      break;               &(*left)[0],
1390                 &(*right)[0],
1391                 chunksize,
1392                 onumsteps,
1393                 numsteps,
1394                 resultStep,
1395                 leftstep,
1396                 rightstep,
1397                 oleftstep,
1398                 orightstep,
1399                 lroffset,
1400                 rroffset,
1401                 escript::ES_optype::SUB);        
1402            //PROC_OP(NO_ARG,minus<double>());
1403            break;
1404      case MUL:      case MUL:
1405      PROC_OP(NO_ARG,multiplies<double>());          //PROC_OP(NO_ARG,multiplies<double>());
1406      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1407                 &(*left)[0],
1408                 &(*right)[0],
1409                 chunksize,
1410                 onumsteps,
1411                 numsteps,
1412                 resultStep,
1413                 leftstep,
1414                 rightstep,
1415                 oleftstep,
1416                 orightstep,
1417                 lroffset,
1418                 rroffset,
1419                 escript::ES_optype::MUL);        
1420            break;
1421      case DIV:      case DIV:
1422      PROC_OP(NO_ARG,divides<double>());          //PROC_OP(NO_ARG,divides<double>());
1423      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1424                 &(*left)[0],
1425                 &(*right)[0],
1426                 chunksize,
1427                 onumsteps,
1428                 numsteps,
1429                 resultStep,
1430                 leftstep,
1431                 rightstep,
1432                 oleftstep,
1433                 orightstep,
1434                 lroffset,
1435                 rroffset,
1436                 escript::ES_optype::DIV);        
1437            break;
1438      case POW:      case POW:
1439         PROC_OP(double (double,double),::pow);         //PROC_OP(double (double,double),::pow);
1440      break;        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,
1441                 &(*left)[0],
1442                 &(*right)[0],
1443                 chunksize,
1444                 onumsteps,
1445                 numsteps,
1446                 resultStep,
1447                 leftstep,
1448                 rightstep,
1449                 oleftstep,
1450                 orightstep,
1451                 lroffset,
1452                 rroffset,
1453                 escript::ES_optype::POW);        
1454            break;
1455      default:      default:
1456      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1457    }    }
1458  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1459    return &m_samples;    return &m_samples;
# Line 1473  LAZYDEBUG(cout << "Result res[" << roffs Line 1463  LAZYDEBUG(cout << "Result res[" << roffs
1463  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1464  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1465  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1466  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1467  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1468  {  {
1469  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1470    
1471    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1472      // first work out which of the children are expanded          // first work out which of the children are expanded
1473    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1474    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1475    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1476    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method    int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
1477    int rightStep=(rightExp?m_right->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1478    
1479    int resultStep=getNoValues();    int resultStep=getNoValues();
1480    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1481    size_t offset=roffset;    size_t offset=roffset;
1482    
1483    const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);    const RealVectorType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1484    
1485    const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);    const RealVectorType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1486    
1487  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1488  cout << getNoValues() << endl;)  cout << getNoValues() << endl;)
# Line 1506  LAZYDEBUG(cout << "m_samplesize=" << m_s Line 1496  LAZYDEBUG(cout << "m_samplesize=" << m_s
1496  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1497  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1498    
1499    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(m_samples[offset]);         // results are stored at the vector offset we received
1500    switch(m_op)    switch(m_op)
1501    {    {
1502      case PROD:      case PROD:
1503      for (int i=0;i<steps;++i,resultp+=resultStep)          for (int i=0;i<steps;++i,resultp+=resultStep)
1504      {          {
1505            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1506            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1507    
1508  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)  LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1509  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)  LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1510    
1511            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1512    
1513        lroffset+=leftStep;            lroffset+=leftStep;
1514        rroffset+=rightStep;            rroffset+=rightStep;
1515      }          }
1516      break;          break;
1517      default:      default:
1518      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");          throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1519    }    }
1520    roffset=offset;    roffset=offset;
1521    return &m_samples;    return &m_samples;
1522  }  }
1523    
1524    
1525  const DataTypes::ValueType*  const DataTypes::RealVectorType*
1526  DataLazy::resolveSample(int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1527  {  {
1528  #ifdef _OPENMP  #ifdef _OPENMP
1529      int tid=omp_get_thread_num();          int tid=omp_get_thread_num();
1530  #else  #else
1531      int tid=0;          int tid=0;
1532  #endif  #endif
1533    
1534  #ifdef LAZY_STACK_PROF  #ifdef LAZY_STACK_PROF
1535      stackstart[tid]=&tid;          stackstart[tid]=&tid;
1536      stackend[tid]=&tid;          stackend[tid]=&tid;
1537      const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);          const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1538      size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];          size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1539      #pragma omp critical          #pragma omp critical
1540      if (d>maxstackuse)          if (d>maxstackuse)
1541      {          {
1542  cout << "Max resolve Stack use " << d << endl;  cout << "Max resolve Stack use " << d << endl;
1543          maxstackuse=d;                  maxstackuse=d;
1544      }          }
1545      return r;          return r;
1546  #else  #else
1547      return resolveNodeSample(tid, sampleNo, roffset);          return resolveNodeSample(tid, sampleNo, roffset);
1548  #endif  #endif
1549  }  }
1550    
# Line 1564  void Line 1554  void
1554  DataLazy::resolveToIdentity()  DataLazy::resolveToIdentity()
1555  {  {
1556     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1557      return;          return;
1558     DataReady_ptr p=resolveNodeWorker();     DataReady_ptr p=resolveNodeWorker();
1559     makeIdentity(p);     makeIdentity(p);
1560  }  }
# Line 1601  DataLazy::resolveGroupWorker(std::vector Line 1591  DataLazy::resolveGroupWorker(std::vector
1591  {  {
1592    if (dats.empty())    if (dats.empty())
1593    {    {
1594      return;          return;
1595    }    }
1596    vector<DataLazy*> work;    vector<DataLazy*> work;
1597    FunctionSpace fs=dats[0]->getFunctionSpace();    FunctionSpace fs=dats[0]->getFunctionSpace();
1598    bool match=true;    bool match=true;
1599    for (int i=dats.size()-1;i>0;--i)    for (int i=dats.size()-1;i>=0;--i)
1600    {    {
1601      if (dats[i]->m_readytype!='E')          if (dats[i]->m_readytype!='E')
1602      {          {
1603          dats[i]->collapse();                  dats[i]->collapse();
1604      }          }
1605      if (dats[i]->m_op!=IDENTITY)          if (dats[i]->m_op!=IDENTITY)
1606      {          {
1607          work.push_back(dats[i]);                  work.push_back(dats[i]);
1608          if (fs!=dats[i]->getFunctionSpace())                  if (fs!=dats[i]->getFunctionSpace())
1609          {                  {
1610              match=false;                          match=false;
1611          }                  }
1612      }          }
1613    }    }
1614    if (work.empty())    if (work.empty())
1615    {    {
1616      return;     // no work to do          return;         // no work to do
1617    }    }
1618    if (match)    // all functionspaces match.  Yes I realise this is overly strict    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1619    {     // it is possible that dats[0] is one of the objects which we discarded and    {             // it is possible that dats[0] is one of the objects which we discarded and
1620          // all the other functionspaces match.                  // all the other functionspaces match.
1621      vector<DataExpanded*> dep;          vector<DataExpanded*> dep;
1622      vector<ValueType*> vecs;          vector<RealVectorType*> vecs;
1623      for (int i=0;i<work.size();++i)          for (int i=0;i<work.size();++i)
1624      {          {
1625          dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));                  dep.push_back(new DataExpanded(fs,work[i]->getShape(), RealVectorType(work[i]->getNoValues())));
1626          vecs.push_back(&(dep[i]->getVectorRW()));                  vecs.push_back(&(dep[i]->getVectorRW()));
1627      }          }
1628      int totalsamples=work[0]->getNumSamples();          int totalsamples=work[0]->getNumSamples();
1629      const ValueType* res=0; // Storage for answer          const RealVectorType* res=0; // Storage for answer
1630      int sample;          int sample;
1631      #pragma omp parallel private(sample, res)          #pragma omp parallel private(sample, res)
1632      {          {
1633          size_t roffset=0;              size_t roffset=0;
1634          #pragma omp for schedule(static)              #pragma omp for schedule(static)
1635          for (sample=0;sample<totalsamples;++sample)              for (sample=0;sample<totalsamples;++sample)
1636          {              {
1637          roffset=0;                  roffset=0;
1638          int j;                  int j;
1639          for (j=work.size()-1;j>0;--j)                  for (j=work.size()-1;j>=0;--j)
1640          {                  {
1641  #ifdef _OPENMP  #ifdef _OPENMP
1642                  res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);                      res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1643  #else  #else
1644                  res=work[j]->resolveNodeSample(0,sample,roffset);                      res=work[j]->resolveNodeSample(0,sample,roffset);
1645  #endif  #endif
1646                  DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);                      RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1647                  memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));                      memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1648          }                  }
1649          }              }
1650      }          }
1651      // Now we need to load the new results as identity ops into the lazy nodes          // Now we need to load the new results as identity ops into the lazy nodes
1652      for (int i=work.size()-1;i>0;--i)          for (int i=work.size()-1;i>=0;--i)
1653      {          {
1654          work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));              work[i]->makeIdentity(REFCOUNTNS::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1655      }          }
1656    }    }
1657    else  // functionspaces do not match    else  // functionspaces do not match
1658    {    {
1659      for (int i=0;i<work.size();++i)          for (int i=0;i<work.size();++i)
1660      {          {
1661          work[i]->resolveToIdentity();                  work[i]->resolveToIdentity();
1662      }          }
1663    }    }
1664  }  }
1665    
# Line 1679  DataLazy::resolveGroupWorker(std::vector Line 1669  DataLazy::resolveGroupWorker(std::vector
1669  DataReady_ptr  DataReady_ptr
1670  DataLazy::resolveNodeWorker()  DataLazy::resolveNodeWorker()
1671  {  {
1672    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1673    {    {
1674      collapse();      collapse();
1675    }    }
1676    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
1677    {    {
1678      return m_id;      return m_id;
1679    }    }
1680      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'          // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
1681    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  RealVectorType(getNoValues()));
1682    ValueType& resvec=result->getVectorRW();    RealVectorType& resvec=result->getVectorRW();
1683    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1684    
1685    int sample;    int sample;
1686    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1687    const ValueType* res=0;   // Storage for answer    const RealVectorType* res=0;       // Storage for answer
1688  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1689    #pragma omp parallel private(sample,res)    #pragma omp parallel private(sample,res)
1690    {    {
1691      size_t roffset=0;          size_t roffset=0;
1692  #ifdef LAZY_STACK_PROF  #ifdef LAZY_STACK_PROF
1693      stackstart[omp_get_thread_num()]=&roffset;          stackstart[omp_get_thread_num()]=&roffset;
1694      stackend[omp_get_thread_num()]=&roffset;          stackend[omp_get_thread_num()]=&roffset;
1695  #endif  #endif
1696      #pragma omp for schedule(static)          #pragma omp for schedule(static)
1697      for (sample=0;sample<totalsamples;++sample)          for (sample=0;sample<totalsamples;++sample)
1698      {          {
1699          roffset=0;                  roffset=0;
1700  #ifdef _OPENMP  #ifdef _OPENMP
1701              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);                  res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1702  #else  #else
1703              res=resolveNodeSample(0,sample,roffset);                  res=resolveNodeSample(0,sample,roffset);
1704  #endif  #endif
1705  LAZYDEBUG(cout << "Sample #" << sample << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1706  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1707              DataVector::size_type outoffset=result->getPointOffset(sample,0);                  RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1708              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));                  memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1709      }          }
1710    }    }
1711  #ifdef LAZY_STACK_PROF  #ifdef LAZY_STACK_PROF
1712    for (int i=0;i<getNumberOfThreads();++i)    for (int i=0;i<getNumberOfThreads();++i)
1713    {    {
1714      size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);          size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1715  //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;  //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1716      if (r>maxstackuse)          if (r>maxstackuse)
1717      {          {
1718          maxstackuse=r;                  maxstackuse=r;
1719      }          }
1720    }    }
1721    cout << "Max resolve Stack use=" << maxstackuse << endl;    cout << "Max resolve Stack use=" << maxstackuse << endl;
1722  #endif  #endif
# Line 1740  DataLazy::toString() const Line 1730  DataLazy::toString() const
1730    oss << "Lazy Data: [depth=" << m_height<< "] ";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1731    switch (escriptParams.getLAZY_STR_FMT())    switch (escriptParams.getLAZY_STR_FMT())
1732    {    {
1733    case 1:   // tree format    case 1:       // tree format
1734      oss << endl;          oss << endl;
1735      intoTreeString(oss,"");          intoTreeString(oss,"");
1736      break;          break;
1737    case 2:   // just the depth    case 2:       // just the depth
1738      break;          break;
1739    default:    default:
1740      intoString(oss);          intoString(oss);
1741      break;          break;
1742    }    }
1743    return oss.str();    return oss.str();
1744  }  }
# Line 1761  DataLazy::intoString(ostringstream& oss) Line 1751  DataLazy::intoString(ostringstream& oss)
1751    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1752    {    {
1753    case G_IDENTITY:    case G_IDENTITY:
1754      if (m_id->isExpanded())          if (m_id->isExpanded())
1755      {          {
1756         oss << "E";             oss << "E";
1757      }          }
1758      else if (m_id->isTagged())          else if (m_id->isTagged())
1759      {          {
1760        oss << "T";            oss << "T";
1761      }          }
1762      else if (m_id->isConstant())          else if (m_id->isConstant())
1763      {          {
1764        oss << "C";            oss << "C";
1765      }          }
1766      else          else
1767      {          {
1768        oss << "?";            oss << "?";
1769      }          }
1770      oss << '@' << m_id.get();          oss << '@' << m_id.get();
1771      break;          break;
1772    case G_BINARY:    case G_BINARY:
1773      oss << '(';          oss << '(';
1774      m_left->intoString(oss);          m_left->intoString(oss);
1775      oss << ' ' << opToString(m_op) << ' ';          oss << ' ' << opToString(m_op) << ' ';
1776      m_right->intoString(oss);          m_right->intoString(oss);
1777      oss << ')';          oss << ')';
1778      break;          break;
1779    case G_UNARY:    case G_UNARY:
1780    case G_UNARY_P:    case G_UNARY_P:
1781    case G_NP1OUT:    case G_NP1OUT:
1782    case G_NP1OUT_P:    case G_NP1OUT_P:
1783    case G_REDUCTION:    case G_REDUCTION:
1784      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1785      m_left->intoString(oss);          m_left->intoString(oss);
1786      oss << ')';          oss << ')';
1787      break;          break;
1788    case G_TENSORPROD:    case G_TENSORPROD:
1789      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1790      m_left->intoString(oss);          m_left->intoString(oss);
1791      oss << ", ";          oss << ", ";
1792      m_right->intoString(oss);          m_right->intoString(oss);
1793      oss << ')';          oss << ')';
1794      break;          break;
1795    case G_NP1OUT_2P:    case G_NP1OUT_2P:
1796      oss << opToString(m_op) << '(';          oss << opToString(m_op) << '(';
1797      m_left->intoString(oss);          m_left->intoString(oss);
1798      oss << ", " << m_axis_offset << ", " << m_transpose;          oss << ", " << m_axis_offset << ", " << m_transpose;
1799      oss << ')';          oss << ')';
1800      break;          break;
1801      case G_CONDEVAL:
1802            oss << opToString(m_op)<< '(' ;
1803            m_mask->intoString(oss);
1804            oss << " ? ";
1805            m_left->intoString(oss);
1806            oss << " : ";
1807            m_right->intoString(oss);
1808            oss << ')';
1809            break;
1810    default:    default:
1811      oss << "UNKNOWN";          oss << "UNKNOWN";
1812    }    }
1813  }  }
1814    
# Line 1821  DataLazy::intoTreeString(ostringstream& Line 1820  DataLazy::intoTreeString(ostringstream&
1820    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1821    {    {
1822    case G_IDENTITY:    case G_IDENTITY:
1823      if (m_id->isExpanded())          if (m_id->isExpanded())
1824      {          {
1825         oss << "E";             oss << "E";
1826      }          }
1827      else if (m_id->isTagged())          else if (m_id->isTagged())
1828      {          {
1829        oss << "T";            oss << "T";
1830      }          }
1831      else if (m_id->isConstant())          else if (m_id->isConstant())
1832      {          {
1833        oss << "C";            oss << "C";
1834      }          }
1835      else          else
1836      {          {
1837        oss << "?";            oss << "?";
1838      }          }
1839      oss << '@' << m_id.get() << endl;          oss << '@' << m_id.get() << endl;
1840      break;          break;
1841    case G_BINARY:    case G_BINARY:
1842      oss << opToString(m_op) << endl;          oss << opToString(m_op) << endl;
1843      indent+='.';          indent+='.';
1844      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
1845      m_right->intoTreeString(oss, indent);          m_right->intoTreeString(oss, indent);
1846      break;          break;
1847    case G_UNARY:    case G_UNARY:
1848    case G_UNARY_P:    case G_UNARY_P:
1849    case G_NP1OUT:    case G_NP1OUT:
1850    case G_NP1OUT_P:    case G_NP1OUT_P:
1851    case G_REDUCTION:    case G_REDUCTION:
1852      oss << opToString(m_op) << endl;          oss << opToString(m_op) << endl;
1853      indent+='.';          indent+='.';
1854      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
1855      break;          break;
1856    case G_TENSORPROD:    case G_TENSORPROD:
1857      oss << opToString(m_op) << endl;          oss << opToString(m_op) << endl;
1858      indent+='.';          indent+='.';
1859      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
1860      m_right->intoTreeString(oss, indent);          m_right->intoTreeString(oss, indent);
1861      break;          break;
1862    case G_NP1OUT_2P:    case G_NP1OUT_2P:
1863      oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;          oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1864      indent+='.';          indent+='.';
1865      m_left->intoTreeString(oss, indent);          m_left->intoTreeString(oss, indent);
1866      break;          break;
1867    default:    default:
1868      oss << "UNKNOWN";          oss << "UNKNOWN";
1869    }    }
1870  }  }
1871    
1872    
1873  DataAbstract*  DataAbstract*
1874  DataLazy::deepCopy()  DataLazy::deepCopy() const
1875  {  {
1876    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1877    {    {
1878    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1879    case G_UNARY:    case G_UNARY:
1880    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1881    case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);    case G_UNARY_P:       return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1882    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:        return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1883    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1884    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1885    case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);    case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1886    case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1887    default:    default:
1888      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");          throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1889    }    }
1890  }  }
1891    
# Line 1898  DataLazy::deepCopy() Line 1897  DataLazy::deepCopy()
1897  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
1898  // form part of the expression.  // form part of the expression.
1899  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
1900  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
1901  DataLazy::getLength() const  DataLazy::getLength() const
1902  {  {
1903    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1913  DataLazy::getSlice(const DataTypes::Regi Line 1912  DataLazy::getSlice(const DataTypes::Regi
1912    
1913    
1914  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
1915  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
1916  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1917                   int dataPointNo)                   int dataPointNo)
1918  {  {
1919    if (m_op==IDENTITY)    if (m_op==IDENTITY)
1920    {    {
1921      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
1922    }    }
1923    if (m_readytype!='E')    if (m_readytype!='E')
1924    {    {
1925      collapse();          collapse();
1926      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
1927    }    }
1928    // at this point we do not have an identity node and the expression will be Expanded    // at this point we do not have an identity node and the expression will be Expanded
1929    // so we only need to know which child to ask    // so we only need to know which child to ask
1930    if (m_left->m_readytype=='E')    if (m_left->m_readytype=='E')
1931    {    {
1932      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
1933    }    }
1934    else    else
1935    {    {
1936      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
1937    }    }
1938  }  }
1939    
1940  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
1941  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
1942  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
1943                   int dataPointNo) const                   int dataPointNo) const
1944  {  {
1945    if (m_op==IDENTITY)    if (m_op==IDENTITY)
1946    {    {
1947      return m_id->getPointOffset(sampleNo,dataPointNo);          return m_id->getPointOffset(sampleNo,dataPointNo);
1948    }    }
1949    if (m_readytype=='E')    if (m_readytype=='E')
1950    {    {
# Line 1953  DataLazy::getPointOffset(int sampleNo, Line 1952  DataLazy::getPointOffset(int sampleNo,
1952      // so we only need to know which child to ask      // so we only need to know which child to ask
1953      if (m_left->m_readytype=='E')      if (m_left->m_readytype=='E')
1954      {      {
1955      return m_left->getPointOffset(sampleNo,dataPointNo);          return m_left->getPointOffset(sampleNo,dataPointNo);
1956      }      }
1957      else      else
1958      {      {
1959      return m_right->getPointOffset(sampleNo,dataPointNo);          return m_right->getPointOffset(sampleNo,dataPointNo);
1960      }      }
1961    }    }
1962    if (m_readytype=='C')    if (m_readytype=='C')
1963    {    {
1964      return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter          return m_left->getPointOffset(sampleNo,dataPointNo); // which child doesn't matter
1965    }    }
1966    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
1967  }  }
# Line 1972  DataLazy::getPointOffset(int sampleNo, Line 1971  DataLazy::getPointOffset(int sampleNo,
1971  void  void
1972  DataLazy::setToZero()  DataLazy::setToZero()
1973  {  {
1974  //   DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
1975  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
1976  //   m_op=IDENTITY;  //   m_op=IDENTITY;
1977  //   m_right.reset();    //   m_right.reset();  
# Line 1980  DataLazy::setToZero() Line 1979  DataLazy::setToZero()
1979  //   m_readytype='C';  //   m_readytype='C';
1980  //   m_buffsRequired=1;  //   m_buffsRequired=1;
1981    
1982    privdebug=privdebug;  // to stop the compiler complaining about unused privdebug    (void)privdebug;  // to stop the compiler complaining about unused privdebug
1983    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
1984  }  }
1985    
1986  bool  bool
1987  DataLazy::actsExpanded() const  DataLazy::actsExpanded() const
1988  {  {
1989      return (m_readytype=='E');          return (m_readytype=='E');
1990  }  }
1991    
1992  }   // end namespace  } // end namespace
1993    

Legend:
Removed from v.2799  
changed lines
  Added in v.6168

  ViewVC Help
Powered by ViewVC 1.1.26