/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger_upto1946/escript/src/DataLazy.cpp revision 1993 by phornby, Fri Nov 7 04:52:15 2008 UTC trunk/escriptcore/src/DataLazy.cpp revision 5938 by jfenwick, Thu Feb 18 06:30:35 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
22  #ifdef _OPENMP  #ifdef _OPENMP
23  #include <omp.h>  #include <omp.h>
24  #endif  #endif
# Line 28  Line 28 
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29  #include "Utils.h"  #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #ifdef USE_NETCDF
34    #include <netcdfcpp.h>
35    #endif
36    
37    #include <iomanip>      // for some fancy formatting in debug
38    
39    using namespace escript::DataTypes;
40    
41    // #define LAZYDEBUG(X) if (privdebug){X;}
42    #define LAZYDEBUG(X)
43    namespace
44    {
45    bool privdebug=false;
46    
47    #define ENABLEDEBUG privdebug=true;
48    #define DISABLEDEBUG privdebug=false;
49    }
50    
51    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
52    
53    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
54    
55    
56    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
57    
58  /*  /*
59  How does DataLazy work?  How does DataLazy work?
60  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 39  A special operation, IDENTITY, stores an Line 66  A special operation, IDENTITY, stores an
66  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
67    
68  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
69  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
70    
71  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
72  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 48  I will refer to individual DataLazy obje Line 75  I will refer to individual DataLazy obje
75  Each node also stores:  Each node also stores:
76  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
77      evaluated.      evaluated.
78  - m_length ~ how many values would be stored in the answer if the expression was evaluated.  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
 - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  
79      evaluate the expression.      evaluate the expression.
80  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
81    
# Line 70  The convention that I use, is that the r Line 96  The convention that I use, is that the r
96    
97  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
98  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
99    
100    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
101    1) Add to the ES_optype.
102    2) determine what opgroup your operation belongs to (X)
103    3) add a string for the op to the end of ES_opstrings
104    4) increase ES_opcount
105    5) add an entry (X) to opgroups
106    6) add an entry to the switch in collapseToReady
107    7) add an entry to resolveX
108  */  */
109    
110    
# Line 82  namespace escript Line 117  namespace escript
117  namespace  namespace
118  {  {
119    
120    
121    // enabling this will print out when ever the maximum stacksize used by resolve increases
122    // it assumes _OPENMP is also in use
123    //#define LAZY_STACK_PROF
124    
125    
126    
127    #ifndef _OPENMP
128      #ifdef LAZY_STACK_PROF
129      #undef LAZY_STACK_PROF
130      #endif
131    #endif
132    
133    
134    #ifdef LAZY_STACK_PROF
135    std::vector<void*> stackstart(getNumberOfThreads());
136    std::vector<void*> stackend(getNumberOfThreads());
137    size_t maxstackuse=0;
138    #endif
139    
140  enum ES_opgroup  enum ES_opgroup
141  {  {
142     G_UNKNOWN,     G_UNKNOWN,
143     G_IDENTITY,     G_IDENTITY,
144     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
145     G_UNARY      // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
146       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
147       G_NP1OUT,        // non-pointwise op with one output
148       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
149       G_TENSORPROD,    // general tensor product
150       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
151       G_REDUCTION,     // non-pointwise unary op with a scalar output
152       G_CONDEVAL
153  };  };
154    
155    
# Line 98  string ES_opstrings[]={"UNKNOWN","IDENTI Line 160  string ES_opstrings[]={"UNKNOWN","IDENTI
160              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
161              "asinh","acosh","atanh",              "asinh","acosh","atanh",
162              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
163              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
164  int ES_opcount=33;              "symmetric","nonsymmetric",
165                "prod",
166                "transpose", "trace",
167                "swapaxes",
168                "minval", "maxval",
169                "condEval"};
170    int ES_opcount=44;
171  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
172              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
173              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
174              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
175              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
176              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
177                G_NP1OUT,G_NP1OUT,
178                G_TENSORPROD,
179                G_NP1OUT_P, G_NP1OUT_P,
180                G_NP1OUT_2P,
181                G_REDUCTION, G_REDUCTION,
182                G_CONDEVAL};
183  inline  inline
184  ES_opgroup  ES_opgroup
185  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 126  resultFS(DataAbstract_ptr left, DataAbst Line 200  resultFS(DataAbstract_ptr left, DataAbst
200    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
201    if (l!=r)    if (l!=r)
202    {    {
203      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
204        if (res==1)
205      {      {
206      return l;      return l;
207      }      }
208      if (l.probeInterpolation(r))      if (res==-1)
209      {      {
210      return r;      return r;
211      }      }
# Line 140  resultFS(DataAbstract_ptr left, DataAbst Line 215  resultFS(DataAbstract_ptr left, DataAbst
215  }  }
216    
217  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
218    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
219  DataTypes::ShapeType  DataTypes::ShapeType
220  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
221  {  {
222      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
223      {      {
224        if (getOpgroup(op)!=G_BINARY)        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
225        {        {
226          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
227        }        }
228    
229        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
230        {        {
231          return right->getShape();          return right->getShape();
# Line 162  resultShape(DataAbstract_ptr left, DataA Line 239  resultShape(DataAbstract_ptr left, DataA
239      return left->getShape();      return left->getShape();
240  }  }
241    
242  // determine the number of points in the result of "left op right"  // return the shape for "op left"
243  size_t  
244  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataTypes::ShapeType
245    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
246  {  {
247     switch (getOpgroup(op))      switch(op)
248     {      {
249     case G_BINARY: return left->getLength();          case TRANS:
250     case G_UNARY: return left->getLength();         {            // for the scoping of variables
251     default:          const DataTypes::ShapeType& s=left->getShape();
252      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
253     }          int rank=left->getRank();
254            if (axis_offset<0 || axis_offset>rank)
255            {
256                stringstream e;
257                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
258                throw DataException(e.str());
259            }
260            for (int i=0; i<rank; i++)
261            {
262               int index = (axis_offset+i)%rank;
263               sh.push_back(s[index]); // Append to new shape
264            }
265            return sh;
266           }
267        break;
268        case TRACE:
269           {
270            int rank=left->getRank();
271            if (rank<2)
272            {
273               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
274            }
275            if ((axis_offset>rank-2) || (axis_offset<0))
276            {
277               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
278            }
279            if (rank==2)
280            {
281               return DataTypes::scalarShape;
282            }
283            else if (rank==3)
284            {
285               DataTypes::ShapeType sh;
286                   if (axis_offset==0)
287               {
288                    sh.push_back(left->getShape()[2]);
289                   }
290                   else     // offset==1
291               {
292                sh.push_back(left->getShape()[0]);
293                   }
294               return sh;
295            }
296            else if (rank==4)
297            {
298               DataTypes::ShapeType sh;
299               const DataTypes::ShapeType& s=left->getShape();
300                   if (axis_offset==0)
301               {
302                    sh.push_back(s[2]);
303                    sh.push_back(s[3]);
304                   }
305                   else if (axis_offset==1)
306               {
307                    sh.push_back(s[0]);
308                    sh.push_back(s[3]);
309                   }
310               else     // offset==2
311               {
312                sh.push_back(s[0]);
313                sh.push_back(s[1]);
314               }
315               return sh;
316            }
317            else        // unknown rank
318            {
319               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
320            }
321           }
322        break;
323            default:
324        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
325        }
326  }  }
327    
328  // determine the number of samples requires to evaluate an expression combining left and right  DataTypes::ShapeType
329  int  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
330  {  {
331     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
332     {       // Some of the checks are probably redundant here
333     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
334     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
335     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
336     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
337      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
338     }       int rank=left->getRank();
339         if (rank<2) {
340            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
341         }
342         if (axis0<0 || axis0>rank-1) {
343            stringstream e;
344            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
345            throw DataException(e.str());
346         }
347         if (axis1<0 || axis1>rank-1) {
348            stringstream e;
349            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
350            throw DataException(e.str());
351         }
352         if (axis0 == axis1) {
353             throw DataException("Error - Data::swapaxes: axis indices must be different.");
354         }
355         if (axis0 > axis1) {
356             axis0_tmp=axis1;
357             axis1_tmp=axis0;
358         } else {
359             axis0_tmp=axis0;
360             axis1_tmp=axis1;
361         }
362         for (int i=0; i<rank; i++) {
363           if (i == axis0_tmp) {
364              out_shape.push_back(s[axis1_tmp]);
365           } else if (i == axis1_tmp) {
366              out_shape.push_back(s[axis0_tmp]);
367           } else {
368              out_shape.push_back(s[i]);
369           }
370         }
371        return out_shape;
372  }  }
373    
374    
375    // determine the output shape for the general tensor product operation
376    // the additional parameters return information required later for the product
377    // the majority of this code is copy pasted from C_General_Tensor_Product
378    DataTypes::ShapeType
379    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
380    {
381        
382      // Get rank and shape of inputs
383      int rank0 = left->getRank();
384      int rank1 = right->getRank();
385      const DataTypes::ShapeType& shape0 = left->getShape();
386      const DataTypes::ShapeType& shape1 = right->getShape();
387    
388      // Prepare for the loops of the product and verify compatibility of shapes
389      int start0=0, start1=0;
390      if (transpose == 0)       {}
391      else if (transpose == 1)  { start0 = axis_offset; }
392      else if (transpose == 2)  { start1 = rank1-axis_offset; }
393      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
394    
395      if (rank0<axis_offset)
396      {
397        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
398      }
399    
400      // Adjust the shapes for transpose
401      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
402      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
403      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
404      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
405    
406      // Prepare for the loops of the product
407      SL=1, SM=1, SR=1;
408      for (int i=0; i<rank0-axis_offset; i++)   {
409        SL *= tmpShape0[i];
410      }
411      for (int i=rank0-axis_offset; i<rank0; i++)   {
412        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
413          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
414        }
415        SM *= tmpShape0[i];
416      }
417      for (int i=axis_offset; i<rank1; i++)     {
418        SR *= tmpShape1[i];
419      }
420    
421      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
422      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
423      {         // block to limit the scope of out_index
424         int out_index=0;
425         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
426         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
427      }
428    
429      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
430      {
431         ostringstream os;
432         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
433         throw DataException(os.str());
434      }
435    
436      return shape2;
437    }
438    
439  }   // end anonymous namespace  }   // end anonymous namespace
440    
441    
# Line 205  opToString(ES_optype op) Line 451  opToString(ES_optype op)
451    return ES_opstrings[op];    return ES_opstrings[op];
452  }  }
453    
454    void DataLazy::LazyNodeSetup()
455    {
456    #ifdef _OPENMP
457        int numthreads=omp_get_max_threads();
458        m_samples.resize(numthreads*m_samplesize);
459        m_sampleids=new int[numthreads];
460        for (int i=0;i<numthreads;++i)
461        {
462            m_sampleids[i]=-1;  
463        }
464    #else
465        m_samples.resize(m_samplesize);
466        m_sampleids=new int[1];
467        m_sampleids[0]=-1;
468    #endif  // _OPENMP
469    }
470    
471    
472    // Creates an identity node
473  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
474      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
475      m_op(IDENTITY)      ,m_sampleids(0),
476        m_samples(1)
477  {  {
478     if (p->isLazy())     if (p->isLazy())
479     {     {
# Line 219  DataLazy::DataLazy(DataAbstract_ptr p) Line 484  DataLazy::DataLazy(DataAbstract_ptr p)
484     }     }
485     else     else
486     {     {
487      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
488      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
489      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
490      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
491     }     }
492     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
493  }  }
494    
   
   
   
495  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
496      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
497      m_op(op)      m_op(op),
498        m_axis_offset(0),
499        m_transpose(0),
500        m_SL(0), m_SM(0), m_SR(0)
501  {  {
502     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
503     {     {
504      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
505     }     }
506    
507     DataLazy_ptr lleft;     DataLazy_ptr lleft;
508     if (!left->isLazy())     if (!left->isLazy())
509     {     {
# Line 252  DataLazy::DataLazy(DataAbstract_ptr left Line 514  DataLazy::DataLazy(DataAbstract_ptr left
514      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
515     }     }
516     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
517     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
518     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
519       m_children=m_left->m_children+1;
520       m_height=m_left->m_height+1;
521       LazyNodeSetup();
522       SIZELIMIT
523  }  }
524    
525    
526  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
527  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
528      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
529      m_op(op)      m_op(op),
530        m_SL(0), m_SM(0), m_SR(0)
531  {  {
532     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
533       if ((getOpgroup(op)!=G_BINARY))
534     {     {
535      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
536     }     }
# Line 276  DataLazy::DataLazy(DataAbstract_ptr left Line 542  DataLazy::DataLazy(DataAbstract_ptr left
542      Data tmp(ltemp,fs);      Data tmp(ltemp,fs);
543      left=tmp.borrowDataPtr();      left=tmp.borrowDataPtr();
544     }     }
545     if (getFunctionSpace()!=right->getFunctionSpace())   // left needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
546     {     {
547      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
548      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
549    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
550     }     }
551     left->operandCheck(*right);     left->operandCheck(*right);
552    
553     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
554     {     {
555      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
556    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
557     }     }
558     else     else
559     {     {
560      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
561    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
562     }     }
563     if (right->isLazy())     if (right->isLazy())
564     {     {
565      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
566    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
567     }     }
568     else     else
569     {     {
570      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
571    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
572     }     }
573     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
574     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 313  DataLazy::DataLazy(DataAbstract_ptr left Line 584  DataLazy::DataLazy(DataAbstract_ptr left
584     {     {
585      m_readytype='C';      m_readytype='C';
586     }     }
587     m_length=resultLength(m_left,m_right,m_op);     m_samplesize=getNumDPPSample()*getNoValues();
588     m_samplesize=getNumDPPSample()*getNoValues();         m_children=m_left->m_children+m_right->m_children+2;
589     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
590  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
591       SIZELIMIT
592    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
593  }  }
594    
595    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
596        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
597        m_op(op),
598        m_axis_offset(axis_offset),
599        m_transpose(transpose)
600    {
601       if ((getOpgroup(op)!=G_TENSORPROD))
602       {
603        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
604       }
605       if ((transpose>2) || (transpose<0))
606       {
607        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
608       }
609       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
610       {
611        FunctionSpace fs=getFunctionSpace();
612        Data ltemp(left);
613        Data tmp(ltemp,fs);
614        left=tmp.borrowDataPtr();
615       }
616       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
617       {
618        Data tmp(Data(right),getFunctionSpace());
619        right=tmp.borrowDataPtr();
620       }
621    //    left->operandCheck(*right);
622    
623  DataLazy::~DataLazy()     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
624       {
625        m_left=dynamic_pointer_cast<DataLazy>(left);
626       }
627       else
628       {
629        m_left=DataLazy_ptr(new DataLazy(left));
630       }
631       if (right->isLazy())
632       {
633        m_right=dynamic_pointer_cast<DataLazy>(right);
634       }
635       else
636       {
637        m_right=DataLazy_ptr(new DataLazy(right));
638       }
639       char lt=m_left->m_readytype;
640       char rt=m_right->m_readytype;
641       if (lt=='E' || rt=='E')
642       {
643        m_readytype='E';
644       }
645       else if (lt=='T' || rt=='T')
646       {
647        m_readytype='T';
648       }
649       else
650       {
651        m_readytype='C';
652       }
653       m_samplesize=getNumDPPSample()*getNoValues();
654       m_children=m_left->m_children+m_right->m_children+2;
655       m_height=max(m_left->m_height,m_right->m_height)+1;
656       LazyNodeSetup();
657       SIZELIMIT
658    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
659    }
660    
661    
662    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
663        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
664        m_op(op),
665        m_axis_offset(axis_offset),
666        m_transpose(0),
667        m_tol(0)
668    {
669       if ((getOpgroup(op)!=G_NP1OUT_P))
670       {
671        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
672       }
673       DataLazy_ptr lleft;
674       if (!left->isLazy())
675       {
676        lleft=DataLazy_ptr(new DataLazy(left));
677       }
678       else
679       {
680        lleft=dynamic_pointer_cast<DataLazy>(left);
681       }
682       m_readytype=lleft->m_readytype;
683       m_left=lleft;
684       m_samplesize=getNumDPPSample()*getNoValues();
685       m_children=m_left->m_children+1;
686       m_height=m_left->m_height+1;
687       LazyNodeSetup();
688       SIZELIMIT
689    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
690    }
691    
692    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
693        : parent(left->getFunctionSpace(), left->getShape()),
694        m_op(op),
695        m_axis_offset(0),
696        m_transpose(0),
697        m_tol(tol)
698    {
699       if ((getOpgroup(op)!=G_UNARY_P))
700       {
701        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
702       }
703       DataLazy_ptr lleft;
704       if (!left->isLazy())
705       {
706        lleft=DataLazy_ptr(new DataLazy(left));
707       }
708       else
709       {
710        lleft=dynamic_pointer_cast<DataLazy>(left);
711       }
712       m_readytype=lleft->m_readytype;
713       m_left=lleft;
714       m_samplesize=getNumDPPSample()*getNoValues();
715       m_children=m_left->m_children+1;
716       m_height=m_left->m_height+1;
717       LazyNodeSetup();
718       SIZELIMIT
719    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
720    }
721    
722    
723    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
724        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
725        m_op(op),
726        m_axis_offset(axis0),
727        m_transpose(axis1),
728        m_tol(0)
729  {  {
730       if ((getOpgroup(op)!=G_NP1OUT_2P))
731       {
732        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
733       }
734       DataLazy_ptr lleft;
735       if (!left->isLazy())
736       {
737        lleft=DataLazy_ptr(new DataLazy(left));
738       }
739       else
740       {
741        lleft=dynamic_pointer_cast<DataLazy>(left);
742       }
743       m_readytype=lleft->m_readytype;
744       m_left=lleft;
745       m_samplesize=getNumDPPSample()*getNoValues();
746       m_children=m_left->m_children+1;
747       m_height=m_left->m_height+1;
748       LazyNodeSetup();
749       SIZELIMIT
750    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
751  }  }
752    
753    
754  int  namespace
 DataLazy::getBuffsRequired() const  
755  {  {
756      return m_buffsRequired;  
757        inline int max3(int a, int b, int c)
758        {
759        int t=(a>b?a:b);
760        return (t>c?t:c);
761    
762        }
763    }
764    
765    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
766        : parent(left->getFunctionSpace(), left->getShape()),
767        m_op(CONDEVAL),
768        m_axis_offset(0),
769        m_transpose(0),
770        m_tol(0)
771    {
772    
773       DataLazy_ptr lmask;
774       DataLazy_ptr lleft;
775       DataLazy_ptr lright;
776       if (!mask->isLazy())
777       {
778        lmask=DataLazy_ptr(new DataLazy(mask));
779       }
780       else
781       {
782        lmask=dynamic_pointer_cast<DataLazy>(mask);
783       }
784       if (!left->isLazy())
785       {
786        lleft=DataLazy_ptr(new DataLazy(left));
787       }
788       else
789       {
790        lleft=dynamic_pointer_cast<DataLazy>(left);
791       }
792       if (!right->isLazy())
793       {
794        lright=DataLazy_ptr(new DataLazy(right));
795       }
796       else
797       {
798        lright=dynamic_pointer_cast<DataLazy>(right);
799       }
800       m_readytype=lmask->m_readytype;
801       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
802       {
803        throw DataException("Programmer Error - condEval arguments must have the same readytype");
804       }
805       m_left=lleft;
806       m_right=lright;
807       m_mask=lmask;
808       m_samplesize=getNumDPPSample()*getNoValues();
809       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
810       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
811       LazyNodeSetup();
812       SIZELIMIT
813    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
814    }
815    
816    
817    
818    DataLazy::~DataLazy()
819    {
820       delete[] m_sampleids;
821  }  }
822    
823    
# Line 338  DataLazy::getBuffsRequired() const Line 827  DataLazy::getBuffsRequired() const
827    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
828  */  */
829  DataReady_ptr  DataReady_ptr
830  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
831  {  {
832    if (m_readytype=='E')    if (m_readytype=='E')
833    { // this is more an efficiency concern than anything else    { // this is more an efficiency concern than anything else
# Line 351  DataLazy::collapseToReady() Line 840  DataLazy::collapseToReady()
840    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
841    Data left(pleft);    Data left(pleft);
842    Data right;    Data right;
843    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
844    {    {
845      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
846    }    }
# Line 450  DataLazy::collapseToReady() Line 939  DataLazy::collapseToReady()
939      case LEZ:      case LEZ:
940      result=left.whereNonPositive();      result=left.whereNonPositive();
941      break;      break;
942        case NEZ:
943        result=left.whereNonZero(m_tol);
944        break;
945        case EZ:
946        result=left.whereZero(m_tol);
947        break;
948        case SYM:
949        result=left.symmetric();
950        break;
951        case NSYM:
952        result=left.nonsymmetric();
953        break;
954        case PROD:
955        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
956        break;
957        case TRANS:
958        result=left.transpose(m_axis_offset);
959        break;
960        case TRACE:
961        result=left.trace(m_axis_offset);
962        break;
963        case SWAP:
964        result=left.swapaxes(m_axis_offset, m_transpose);
965        break;
966        case MINVAL:
967        result=left.minval();
968        break;
969        case MAXVAL:
970        result=left.minval();
971        break;
972      default:      default:
973      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
974    }    }
975    return result.borrowReadyPtr();    return result.borrowReadyPtr();
976  }  }
# Line 463  DataLazy::collapseToReady() Line 982  DataLazy::collapseToReady()
982     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
983  */  */
984  void  void
985  DataLazy::collapse()  DataLazy::collapse() const
986  {  {
987    if (m_op==IDENTITY)    if (m_op==IDENTITY)
988    {    {
# Line 477  DataLazy::collapse() Line 996  DataLazy::collapse()
996    m_op=IDENTITY;    m_op=IDENTITY;
997  }  }
998    
999  /*  
1000    \brief Compute the value of the expression (binary operation) for the given sample.  
1001    \return Vector which stores the value of the subexpression for the given sample.  
1002    \param v A vector to store intermediate results.  
1003    \param offset Index in v to begin storing results.  
1004    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1005    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1006        {\
1007    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1008    If the result is stored in v it should be stored at the offset given.        { \
1009    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1010  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1011  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1012  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1013             lroffset+=leftstep; \
1014             rroffset+=rightstep; \
1015          }\
1016          lroffset+=oleftstep;\
1017          rroffset+=orightstep;\
1018        }
1019    
1020    
1021    // The result will be stored in m_samples
1022    // The return value is a pointer to the DataVector, offset is the offset within the return value
1023    const DataTypes::RealVectorType*
1024    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1025    {
1026    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1027        // collapse so we have a 'E' node or an IDENTITY for some other type
1028      if (m_readytype!='E' && m_op!=IDENTITY)
1029      {
1030        collapse();
1031      }
1032      if (m_op==IDENTITY)  
1033      {
1034        const ValueType& vec=m_id->getVectorRO();
1035        roffset=m_id->getPointOffset(sampleNo, 0);
1036    #ifdef LAZY_STACK_PROF
1037    int x;
1038    if (&x<stackend[omp_get_thread_num()])
1039    {
1040           stackend[omp_get_thread_num()]=&x;
1041    }
1042    #endif
1043        return &(vec);
1044      }
1045      if (m_readytype!='E')
1046      {
1047        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1048      }
1049      if (m_sampleids[tid]==sampleNo)
1050      {
1051        roffset=tid*m_samplesize;
1052        return &(m_samples);        // sample is already resolved
1053      }
1054      m_sampleids[tid]=sampleNo;
1055    
1056      switch (getOpgroup(m_op))
1057      {
1058      case G_UNARY:
1059      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1060      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1061      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1062      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1063      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1064      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1065      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1066      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1067      default:
1068        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1069      }
1070    }
1071    
1072    const DataTypes::RealVectorType*
1073    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1074  {  {
1075      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1076      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1077      // processing single points.      // processing single points.
1078        // we will also know we won't get identity nodes
1079    if (m_readytype!='E')    if (m_readytype!='E')
1080    {    {
1081      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1082    }    }
1083    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1084    const double* left=&((*vleft)[roffset]);    {
1085    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1086    roffset=offset;    }
1087      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1088      const double* left=&((*leftres)[roffset]);
1089      roffset=m_samplesize*tid;
1090      double* result=&(m_samples[roffset]);
1091    switch (m_op)    switch (m_op)
1092    {    {
1093      case SIN:        case SIN:  
# Line 533  DataLazy::resolveUnary(ValueType& v, siz Line 1118  DataLazy::resolveUnary(ValueType& v, siz
1118      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1119      break;      break;
1120      case ERF:      case ERF:
1121  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1122      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1123  #else  #else
1124      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1125      break;      break;
1126  #endif  #endif
1127     case ASINH:     case ASINH:
1128  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1129      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1130  #else  #else
1131      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1132  #endif    #endif  
1133      break;      break;
1134     case ACOSH:     case ACOSH:
1135  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1136      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1137  #else  #else
1138      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1139  #endif    #endif  
1140      break;      break;
1141     case ATANH:     case ATANH:
1142  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1143      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1144  #else  #else
1145      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
# Line 601  DataLazy::resolveUnary(ValueType& v, siz Line 1186  DataLazy::resolveUnary(ValueType& v, siz
1186      case LEZ:      case LEZ:
1187      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1188      break;      break;
1189    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1190        case NEZ:
1191        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1192        break;
1193        case EZ:
1194        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1195        break;
1196    
1197      default:      default:
1198      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1199    }    }
1200    return &v;    return &(m_samples);
1201  }  }
1202    
1203    
1204    const DataTypes::RealVectorType*
1205    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1206    {
1207        // we assume that any collapsing has been done before we get here
1208        // since we only have one argument we don't need to think about only
1209        // processing single points.
1210        // we will also know we won't get identity nodes
1211      if (m_readytype!='E')
1212      {
1213        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1214      }
1215      if (m_op==IDENTITY)
1216      {
1217        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1218      }
1219      size_t loffset=0;
1220      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1221    
1222      roffset=m_samplesize*tid;
1223      unsigned int ndpps=getNumDPPSample();
1224      unsigned int psize=DataTypes::noValues(m_left->getShape());
1225      double* result=&(m_samples[roffset]);
1226      switch (m_op)
1227      {
1228        case MINVAL:
1229        {
1230          for (unsigned int z=0;z<ndpps;++z)
1231          {
1232            FMin op;
1233            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1234            loffset+=psize;
1235            result++;
1236          }
1237        }
1238        break;
1239        case MAXVAL:
1240        {
1241          for (unsigned int z=0;z<ndpps;++z)
1242          {
1243          FMax op;
1244          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1245          loffset+=psize;
1246          result++;
1247          }
1248        }
1249        break;
1250        default:
1251        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1252      }
1253      return &(m_samples);
1254    }
1255    
1256    const DataTypes::RealVectorType*
1257    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1258    {
1259        // we assume that any collapsing has been done before we get here
1260        // since we only have one argument we don't need to think about only
1261        // processing single points.
1262      if (m_readytype!='E')
1263      {
1264        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1265      }
1266      if (m_op==IDENTITY)
1267      {
1268        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1269      }
1270      size_t subroffset;
1271      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1272      roffset=m_samplesize*tid;
1273      size_t loop=0;
1274      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1275      size_t step=getNoValues();
1276      size_t offset=roffset;
1277      switch (m_op)
1278      {
1279        case SYM:
1280        for (loop=0;loop<numsteps;++loop)
1281        {
1282            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1283            subroffset+=step;
1284            offset+=step;
1285        }
1286        break;
1287        case NSYM:
1288        for (loop=0;loop<numsteps;++loop)
1289        {
1290            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1291            subroffset+=step;
1292            offset+=step;
1293        }
1294        break;
1295        default:
1296        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1297      }
1298      return &m_samples;
1299    }
1300    
1301  #define PROC_OP(TYPE,X)                               \  const DataTypes::RealVectorType*
1302      for (int i=0;i<steps;++i,resultp+=resultStep) \  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1303      { \  {
1304         tensor_binary_operation##TYPE(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \      // we assume that any collapsing has been done before we get here
1305         lroffset+=leftStep; \      // since we only have one argument we don't need to think about only
1306         rroffset+=rightStep; \      // processing single points.
1307      if (m_readytype!='E')
1308      {
1309        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1310      }
1311      if (m_op==IDENTITY)
1312      {
1313        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1314      }
1315      size_t subroffset;
1316      size_t offset;
1317      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1318      roffset=m_samplesize*tid;
1319      offset=roffset;
1320      size_t loop=0;
1321      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1322      size_t outstep=getNoValues();
1323      size_t instep=m_left->getNoValues();
1324      switch (m_op)
1325      {
1326        case TRACE:
1327        for (loop=0;loop<numsteps;++loop)
1328        {
1329                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1330            subroffset+=instep;
1331            offset+=outstep;
1332        }
1333        break;
1334        case TRANS:
1335        for (loop=0;loop<numsteps;++loop)
1336        {
1337                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1338            subroffset+=instep;
1339            offset+=outstep;
1340      }      }
1341        break;
1342        default:
1343        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1344      }
1345      return &m_samples;
1346    }
1347    
1348    
1349    const DataTypes::RealVectorType*
1350    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1351    {
1352      if (m_readytype!='E')
1353      {
1354        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1355      }
1356      if (m_op==IDENTITY)
1357      {
1358        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1359      }
1360      size_t subroffset;
1361      size_t offset;
1362      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1363      roffset=m_samplesize*tid;
1364      offset=roffset;
1365      size_t loop=0;
1366      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1367      size_t outstep=getNoValues();
1368      size_t instep=m_left->getNoValues();
1369      switch (m_op)
1370      {
1371        case SWAP:
1372        for (loop=0;loop<numsteps;++loop)
1373        {
1374                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1375            subroffset+=instep;
1376            offset+=outstep;
1377        }
1378        break;
1379        default:
1380        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1381      }
1382      return &m_samples;
1383    }
1384    
1385    const DataTypes::RealVectorType*
1386    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1387    {
1388      if (m_readytype!='E')
1389      {
1390        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1391      }
1392      if (m_op!=CONDEVAL)
1393      {
1394        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1395      }
1396      size_t subroffset;
1397    
1398      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1399      const ValueType* srcres=0;
1400      if ((*maskres)[subroffset]>0)
1401      {
1402        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1403      }
1404      else
1405      {
1406        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1407      }
1408    
1409      // Now we need to copy the result
1410    
1411      roffset=m_samplesize*tid;
1412      for (int i=0;i<m_samplesize;++i)
1413      {
1414        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1415      }
1416    
1417      return &m_samples;
1418    }
1419    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1420  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1421  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1422  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 641  DataLazy::resolveUnary(ValueType& v, siz Line 1426  DataLazy::resolveUnary(ValueType& v, siz
1426  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1427  // For example, 2+Vector.  // For example, 2+Vector.
1428  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1429  DataTypes::ValueType*  const DataTypes::RealVectorType*
1430  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1431  {  {
1432  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1433    
1434    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1435      // first work out which of the children are expanded      // first work out which of the children are expanded
1436    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1437    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1438    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1439    int steps=(bigloops?1:getNumDPPSample());    {
1440    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1441    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1442    {    bool leftScalar=(m_left->getRank()==0);
1443      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1444      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1445      chunksize=1;    // for scalar    {
1446    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1447    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1448    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1449    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1450      size_t chunksize=1;           // how many doubles will be processed in one go
1451      int leftstep=0;       // how far should the left offset advance after each step
1452      int rightstep=0;
1453      int numsteps=0;       // total number of steps for the inner loop
1454      int oleftstep=0;  // the o variables refer to the outer loop
1455      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1456      int onumsteps=1;
1457      
1458      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1459      bool RES=(rightExp && rightScalar);
1460      bool LS=(!leftExp && leftScalar); // left is a single scalar
1461      bool RS=(!rightExp && rightScalar);
1462      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1463      bool RN=(!rightExp && !rightScalar);
1464      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1465      bool REN=(rightExp && !rightScalar);
1466    
1467      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1468      {
1469        chunksize=m_left->getNumDPPSample()*leftsize;
1470        leftstep=0;
1471        rightstep=0;
1472        numsteps=1;
1473      }
1474      else if (LES || RES)
1475      {
1476        chunksize=1;
1477        if (LES)        // left is an expanded scalar
1478        {
1479            if (RS)
1480            {
1481               leftstep=1;
1482               rightstep=0;
1483               numsteps=m_left->getNumDPPSample();
1484            }
1485            else        // RN or REN
1486            {
1487               leftstep=0;
1488               oleftstep=1;
1489               rightstep=1;
1490               orightstep=(RN ? -(int)rightsize : 0);
1491               numsteps=rightsize;
1492               onumsteps=m_left->getNumDPPSample();
1493            }
1494        }
1495        else        // right is an expanded scalar
1496        {
1497            if (LS)
1498            {
1499               rightstep=1;
1500               leftstep=0;
1501               numsteps=m_right->getNumDPPSample();
1502            }
1503            else
1504            {
1505               rightstep=0;
1506               orightstep=1;
1507               leftstep=1;
1508               oleftstep=(LN ? -(int)leftsize : 0);
1509               numsteps=leftsize;
1510               onumsteps=m_right->getNumDPPSample();
1511            }
1512        }
1513      }
1514      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1515      {
1516        if (LEN)    // and Right will be a single value
1517        {
1518            chunksize=rightsize;
1519            leftstep=rightsize;
1520            rightstep=0;
1521            numsteps=m_left->getNumDPPSample();
1522            if (RS)
1523            {
1524               numsteps*=leftsize;
1525            }
1526        }
1527        else    // REN
1528        {
1529            chunksize=leftsize;
1530            rightstep=leftsize;
1531            leftstep=0;
1532            numsteps=m_right->getNumDPPSample();
1533            if (LS)
1534            {
1535               numsteps*=rightsize;
1536            }
1537        }
1538      }
1539    
1540      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1541      // Get the values of sub-expressions      // Get the values of sub-expressions
1542    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1543    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1544      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1545    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1546    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1547    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1548    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1549    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1550    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1551    
1552    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1553    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1554    
1555    
1556      roffset=m_samplesize*tid;
1557      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1558    switch(m_op)    switch(m_op)
1559    {    {
1560      case ADD:      case ADD:
1561          PROC_OP(/**/,plus<double>());          PROC_OP(NO_ARG,plus<double>());
1562      break;      break;
1563      case SUB:      case SUB:
1564      PROC_OP(/**/,minus<double>());      PROC_OP(NO_ARG,minus<double>());
1565      break;      break;
1566      case MUL:      case MUL:
1567      PROC_OP(/**/,multiplies<double>());      PROC_OP(NO_ARG,multiplies<double>());
1568      break;      break;
1569      case DIV:      case DIV:
1570      PROC_OP(/**/,divides<double>());      PROC_OP(NO_ARG,divides<double>());
1571      break;      break;
1572      case POW:      case POW:
1573         PROC_OP(<double (double,double)>,::pow);         PROC_OP(double (double,double),::pow);
1574      break;      break;
1575      default:      default:
1576      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1577    }    }
1578    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1579    return &v;    return &m_samples;
1580  }  }
1581    
1582    
1583    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1584    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1585    // unlike the other resolve helpers, we must treat these datapoints separately.
1586    const DataTypes::RealVectorType*
1587    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1588    {
1589    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1590    
1591  /*    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1592    \brief Compute the value of the expression for the given sample.      // first work out which of the children are expanded
1593    \return Vector which stores the value of the subexpression for the given sample.    bool leftExp=(m_left->m_readytype=='E');
1594    \param v A vector to store intermediate results.    bool rightExp=(m_right->m_readytype=='E');
1595    \param offset Index in v to begin storing results.    int steps=getNumDPPSample();
1596    \param sampleNo Sample number to evaluate.    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1597    \param roffset (output parameter) the offset in the return vector where the result begins.    int rightStep=(rightExp?m_right->getNoValues() : 0);
1598    
1599    The return value will be an existing vector so do not deallocate it.    int resultStep=getNoValues();
1600  */    roffset=m_samplesize*tid;
1601  // the vector and the offset are a place where the method could write its data if it wishes    size_t offset=roffset;
1602  // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
1603  // Hence the return value to indicate where the data is actually stored.    const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1604  // Regardless, the storage should be assumed to be used, even if it isn't.  
1605      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1606  // the roffset is the offset within the returned vector where the data begins  
1607  const DataTypes::ValueType*  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1608  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  cout << getNoValues() << endl;)
1609    
1610    
1611    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1612    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1613    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1614    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1615    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1616    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1617    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1618    
1619      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1620      switch(m_op)
1621      {
1622        case PROD:
1623        for (int i=0;i<steps;++i,resultp+=resultStep)
1624        {
1625              const double *ptr_0 = &((*left)[lroffset]);
1626              const double *ptr_1 = &((*right)[rroffset]);
1627    
1628    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1629    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1630    
1631              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1632    
1633          lroffset+=leftStep;
1634          rroffset+=rightStep;
1635        }
1636        break;
1637        default:
1638        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1639      }
1640      roffset=offset;
1641      return &m_samples;
1642    }
1643    
1644    
1645    const DataTypes::RealVectorType*
1646    DataLazy::resolveSample(int sampleNo, size_t& roffset) const
1647  {  {
1648  cout << "Resolve sample " << toString() << endl;  #ifdef _OPENMP
1649      // collapse so we have a 'E' node or an IDENTITY for some other type      int tid=omp_get_thread_num();
1650    if (m_readytype!='E' && m_op!=IDENTITY)  #else
1651        int tid=0;
1652    #endif
1653    
1654    #ifdef LAZY_STACK_PROF
1655        stackstart[tid]=&tid;
1656        stackend[tid]=&tid;
1657        const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1658        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1659        #pragma omp critical
1660        if (d>maxstackuse)
1661        {
1662    cout << "Max resolve Stack use " << d << endl;
1663            maxstackuse=d;
1664        }
1665        return r;
1666    #else
1667        return resolveNodeSample(tid, sampleNo, roffset);
1668    #endif
1669    }
1670    
1671    
1672    // This needs to do the work of the identity constructor
1673    void
1674    DataLazy::resolveToIdentity()
1675    {
1676       if (m_op==IDENTITY)
1677        return;
1678       DataReady_ptr p=resolveNodeWorker();
1679       makeIdentity(p);
1680    }
1681    
1682    void DataLazy::makeIdentity(const DataReady_ptr& p)
1683    {
1684       m_op=IDENTITY;
1685       m_axis_offset=0;
1686       m_transpose=0;
1687       m_SL=m_SM=m_SR=0;
1688       m_children=m_height=0;
1689       m_id=p;
1690       if(p->isConstant()) {m_readytype='C';}
1691       else if(p->isExpanded()) {m_readytype='E';}
1692       else if (p->isTagged()) {m_readytype='T';}
1693       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1694       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1695       m_left.reset();
1696       m_right.reset();
1697    }
1698    
1699    
1700    DataReady_ptr
1701    DataLazy::resolve()
1702    {
1703        resolveToIdentity();
1704        return m_id;
1705    }
1706    
1707    
1708    /* This is really a static method but I think that caused problems in windows */
1709    void
1710    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1711    {
1712      if (dats.empty())
1713    {    {
1714      collapse();      return;
1715    }    }
1716    if (m_op==IDENTITY)      vector<DataLazy*> work;
1717      FunctionSpace fs=dats[0]->getFunctionSpace();
1718      bool match=true;
1719      for (int i=dats.size()-1;i>=0;--i)
1720    {    {
1721      const ValueType& vec=m_id->getVector();      if (dats[i]->m_readytype!='E')
1722      if (m_readytype=='C')      {
1723      {          dats[i]->collapse();
1724      roffset=0;      }
1725      return &(vec);      if (dats[i]->m_op!=IDENTITY)
1726      }      {
1727      roffset=m_id->getPointOffset(sampleNo, 0);          work.push_back(dats[i]);
1728      return &(vec);          if (fs!=dats[i]->getFunctionSpace())
1729            {
1730                match=false;
1731            }
1732        }
1733    }    }
1734    if (m_readytype!='E')    if (work.empty())
1735    {    {
1736      throw DataException("Programmer Error - Collapse did not produce an expanded node.");      return;     // no work to do
1737    }    }
1738    switch (getOpgroup(m_op))    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1739      {     // it is possible that dats[0] is one of the objects which we discarded and
1740            // all the other functionspaces match.
1741        vector<DataExpanded*> dep;
1742        vector<ValueType*> vecs;
1743        for (int i=0;i<work.size();++i)
1744        {
1745            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1746            vecs.push_back(&(dep[i]->getVectorRW()));
1747        }
1748        int totalsamples=work[0]->getNumSamples();
1749        const ValueType* res=0; // Storage for answer
1750        int sample;
1751        #pragma omp parallel private(sample, res)
1752        {
1753            size_t roffset=0;
1754            #pragma omp for schedule(static)
1755            for (sample=0;sample<totalsamples;++sample)
1756            {
1757            roffset=0;
1758            int j;
1759            for (j=work.size()-1;j>=0;--j)
1760            {
1761    #ifdef _OPENMP
1762                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1763    #else
1764                    res=work[j]->resolveNodeSample(0,sample,roffset);
1765    #endif
1766                    RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1767                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1768            }
1769            }
1770        }
1771        // Now we need to load the new results as identity ops into the lazy nodes
1772        for (int i=work.size()-1;i>=0;--i)
1773        {
1774            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1775        }
1776      }
1777      else  // functionspaces do not match
1778    {    {
1779    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      for (int i=0;i<work.size();++i)
1780    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      {
1781    default:          work[i]->resolveToIdentity();
1782      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      }
1783    }    }
1784  }  }
1785    
1786    
1787  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1788  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1789  DataReady_ptr  DataReady_ptr
1790  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1791  {  {
   
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
1792    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1793    {    {
1794      collapse();      collapse();
# Line 761  cout << "Buffers=" << m_buffsRequired << Line 1798  cout << "Buffers=" << m_buffsRequired <<
1798      return m_id;      return m_id;
1799    }    }
1800      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired));    // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
   int threadnum=0;  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1801    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1802    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1803    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1804    
1805    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1806    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1807    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1808    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1809    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel private(sample,res)
1810    for (sample=0;sample<totalsamples;++sample)    {
1811    {      size_t roffset=0;
1812  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1813        stackstart[omp_get_thread_num()]=&roffset;
1814        stackend[omp_get_thread_num()]=&roffset;
1815    #endif
1816        #pragma omp for schedule(static)
1817        for (sample=0;sample<totalsamples;++sample)
1818        {
1819            roffset=0;
1820  #ifdef _OPENMP  #ifdef _OPENMP
1821      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1822  #else  #else
1823      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1824  #endif  #endif
1825  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1826      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1827  cerr << "offset=" << outoffset << endl;              RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1828      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1829      {      }
1830      resvec[outoffset]=(*res)[resoffset];    }
1831      }  #ifdef LAZY_STACK_PROF
1832  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1833      {
1834        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1835    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1836        if (r>maxstackuse)
1837        {
1838            maxstackuse=r;
1839        }
1840    }    }
1841      cout << "Max resolve Stack use=" << maxstackuse << endl;
1842    #endif
1843    return resptr;    return resptr;
1844  }  }
1845    
# Line 803  std::string Line 1847  std::string
1847  DataLazy::toString() const  DataLazy::toString() const
1848  {  {
1849    ostringstream oss;    ostringstream oss;
1850    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1851    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1852      {
1853      case 1:   // tree format
1854        oss << endl;
1855        intoTreeString(oss,"");
1856        break;
1857      case 2:   // just the depth
1858        break;
1859      default:
1860        intoString(oss);
1861        break;
1862      }
1863    return oss.str();    return oss.str();
1864  }  }
1865    
# Line 812  DataLazy::toString() const Line 1867  DataLazy::toString() const
1867  void  void
1868  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1869  {  {
1870    //    oss << "[" << m_children <<";"<<m_height <<"]";
1871    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1872    {    {
1873    case G_IDENTITY:    case G_IDENTITY:
# Line 841  DataLazy::intoString(ostringstream& oss) Line 1897  DataLazy::intoString(ostringstream& oss)
1897      oss << ')';      oss << ')';
1898      break;      break;
1899    case G_UNARY:    case G_UNARY:
1900      case G_UNARY_P:
1901      case G_NP1OUT:
1902      case G_NP1OUT_P:
1903      case G_REDUCTION:
1904      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1905      m_left->intoString(oss);      m_left->intoString(oss);
1906      oss << ')';      oss << ')';
1907      break;      break;
1908      case G_TENSORPROD:
1909        oss << opToString(m_op) << '(';
1910        m_left->intoString(oss);
1911        oss << ", ";
1912        m_right->intoString(oss);
1913        oss << ')';
1914        break;
1915      case G_NP1OUT_2P:
1916        oss << opToString(m_op) << '(';
1917        m_left->intoString(oss);
1918        oss << ", " << m_axis_offset << ", " << m_transpose;
1919        oss << ')';
1920        break;
1921      case G_CONDEVAL:
1922        oss << opToString(m_op)<< '(' ;
1923        m_mask->intoString(oss);
1924        oss << " ? ";
1925        m_left->intoString(oss);
1926        oss << " : ";
1927        m_right->intoString(oss);
1928        oss << ')';
1929        break;
1930      default:
1931        oss << "UNKNOWN";
1932      }
1933    }
1934    
1935    
1936    void
1937    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1938    {
1939      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1940      switch (getOpgroup(m_op))
1941      {
1942      case G_IDENTITY:
1943        if (m_id->isExpanded())
1944        {
1945           oss << "E";
1946        }
1947        else if (m_id->isTagged())
1948        {
1949          oss << "T";
1950        }
1951        else if (m_id->isConstant())
1952        {
1953          oss << "C";
1954        }
1955        else
1956        {
1957          oss << "?";
1958        }
1959        oss << '@' << m_id.get() << endl;
1960        break;
1961      case G_BINARY:
1962        oss << opToString(m_op) << endl;
1963        indent+='.';
1964        m_left->intoTreeString(oss, indent);
1965        m_right->intoTreeString(oss, indent);
1966        break;
1967      case G_UNARY:
1968      case G_UNARY_P:
1969      case G_NP1OUT:
1970      case G_NP1OUT_P:
1971      case G_REDUCTION:
1972        oss << opToString(m_op) << endl;
1973        indent+='.';
1974        m_left->intoTreeString(oss, indent);
1975        break;
1976      case G_TENSORPROD:
1977        oss << opToString(m_op) << endl;
1978        indent+='.';
1979        m_left->intoTreeString(oss, indent);
1980        m_right->intoTreeString(oss, indent);
1981        break;
1982      case G_NP1OUT_2P:
1983        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1984        indent+='.';
1985        m_left->intoTreeString(oss, indent);
1986        break;
1987    default:    default:
1988      oss << "UNKNOWN";      oss << "UNKNOWN";
1989    }    }
1990  }  }
1991    
1992    
1993  DataAbstract*  DataAbstract*
1994  DataLazy::deepCopy()  DataLazy::deepCopy() const
1995  {  {
1996    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1997    {    {
1998    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1999    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
2000      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2001      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2002    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2003      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2004      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2005      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2006      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2007    default:    default:
2008      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2009    }    }
2010  }  }
2011    
2012    
2013  DataTypes::ValueType::size_type  
2014    // There is no single, natural interpretation of getLength on DataLazy.
2015    // Instances of DataReady can look at the size of their vectors.
2016    // For lazy though, it could be the size the data would be if it were resolved;
2017    // or it could be some function of the lengths of the DataReady instances which
2018    // form part of the expression.
2019    // Rather than have people making assumptions, I have disabled the method.
2020    DataTypes::RealVectorType::size_type
2021  DataLazy::getLength() const  DataLazy::getLength() const
2022  {  {
2023    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2024  }  }
2025    
2026    
# Line 879  DataLazy::getSlice(const DataTypes::Regi Line 2032  DataLazy::getSlice(const DataTypes::Regi
2032    
2033    
2034  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2035  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2036  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2037                   int dataPointNo)                   int dataPointNo)
2038  {  {
# Line 905  DataLazy::getPointOffset(int sampleNo, Line 2058  DataLazy::getPointOffset(int sampleNo,
2058  }  }
2059    
2060  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2061  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2062  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2063                   int dataPointNo) const                   int dataPointNo) const
2064  {  {
# Line 933  DataLazy::getPointOffset(int sampleNo, Line 2086  DataLazy::getPointOffset(int sampleNo,
2086    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2087  }  }
2088    
2089  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2090  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2091  void  void
2092  DataLazy::setToZero()  DataLazy::setToZero()
2093  {  {
2094    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2095    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2096    m_op=IDENTITY;  //   m_op=IDENTITY;
2097    m_right.reset();    //   m_right.reset();  
2098    m_left.reset();  //   m_left.reset();
2099    m_readytype='C';  //   m_readytype='C';
2100    m_buffsRequired=1;  //   m_buffsRequired=1;
2101    
2102      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2103      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2104    }
2105    
2106    bool
2107    DataLazy::actsExpanded() const
2108    {
2109        return (m_readytype=='E');
2110  }  }
2111    
2112  }   // end namespace  }   // end namespace

Legend:
Removed from v.1993  
changed lines
  Added in v.5938

  ViewVC Help
Powered by ViewVC 1.1.26