/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/schroedinger_upto1946/escript/src/DataLazy.cpp revision 1950 by jfenwick, Thu Oct 30 00:59:34 2008 UTC trunk/escript/src/DataLazy.cpp revision 4154 by jfenwick, Tue Jan 22 09:30:23 2013 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2013 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development since 2012 by School of Earth Sciences
13    *
14    *****************************************************************************/
15    
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
19  #ifdef _OPENMP  #ifdef _OPENMP
20  #include <omp.h>  #include <omp.h>
21  #endif  #endif
# Line 28  Line 25 
25  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
26  #include "Utils.h"  #include "Utils.h"
27    
28    #include "EscriptParams.h"
29    
30    #ifdef USE_NETCDF
31    #include <netcdfcpp.h>
32    #endif
33    
34    #include <iomanip>      // for some fancy formatting in debug
35    
36    // #define LAZYDEBUG(X) if (privdebug){X;}
37    #define LAZYDEBUG(X)
38    namespace
39    {
40    bool privdebug=false;
41    
42    #define ENABLEDEBUG privdebug=true;
43    #define DISABLEDEBUG privdebug=false;
44    }
45    
46    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47    
48    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49    
50    
51    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53  /*  /*
54  How does DataLazy work?  How does DataLazy work?
55  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 48  I will refer to individual DataLazy obje Line 70  I will refer to individual DataLazy obje
70  Each node also stores:  Each node also stores:
71  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
72      evaluated.      evaluated.
 - m_length ~ how many values would be stored in the answer if the expression was evaluated.  
73  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to
74      evaluate the expression.      evaluate the expression.
75  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
# Line 70  The convention that I use, is that the r Line 91  The convention that I use, is that the r
91    
92  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
93  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
94    
95    To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
96    1) Add to the ES_optype.
97    2) determine what opgroup your operation belongs to (X)
98    3) add a string for the op to the end of ES_opstrings
99    4) increase ES_opcount
100    5) add an entry (X) to opgroups
101    6) add an entry to the switch in collapseToReady
102    7) add an entry to resolveX
103  */  */
104    
105    
# Line 79  using namespace boost; Line 109  using namespace boost;
109  namespace escript  namespace escript
110  {  {
111    
 const std::string&  
 opToString(ES_optype op);  
   
112  namespace  namespace
113  {  {
114    
115    
116    // enabling this will print out when ever the maximum stacksize used by resolve increases
117    // it assumes _OPENMP is also in use
118    //#define LAZY_STACK_PROF
119    
120    
121    
122    #ifndef _OPENMP
123      #ifdef LAZY_STACK_PROF
124      #undef LAZY_STACK_PROF
125      #endif
126    #endif
127    
128    
129    #ifdef LAZY_STACK_PROF
130    std::vector<void*> stackstart(getNumberOfThreads());
131    std::vector<void*> stackend(getNumberOfThreads());
132    size_t maxstackuse=0;
133    #endif
134    
135  enum ES_opgroup  enum ES_opgroup
136  {  {
137     G_UNKNOWN,     G_UNKNOWN,
138     G_IDENTITY,     G_IDENTITY,
139     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
140     G_UNARY      // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
141       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
142       G_NP1OUT,        // non-pointwise op with one output
143       G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
144       G_TENSORPROD,    // general tensor product
145       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
146       G_REDUCTION,     // non-pointwise unary op with a scalar output
147       G_CONDEVAL
148  };  };
149    
150    
# Line 101  string ES_opstrings[]={"UNKNOWN","IDENTI Line 155  string ES_opstrings[]={"UNKNOWN","IDENTI
155              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
156              "asinh","acosh","atanh",              "asinh","acosh","atanh",
157              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
158              "1/","where>0","where<0","where>=0","where<=0"};              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
159  int ES_opcount=33;              "symmetric","nonsymmetric",
160                "prod",
161                "transpose", "trace",
162                "swapaxes",
163                "minval", "maxval",
164                "condEval"};
165    int ES_opcount=44;
166  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
168              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
169              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
170              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY};              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
172                G_NP1OUT,G_NP1OUT,
173                G_TENSORPROD,
174                G_NP1OUT_P, G_NP1OUT_P,
175                G_NP1OUT_2P,
176                G_REDUCTION, G_REDUCTION,
177                G_CONDEVAL};
178  inline  inline
179  ES_opgroup  ES_opgroup
180  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 143  resultFS(DataAbstract_ptr left, DataAbst Line 209  resultFS(DataAbstract_ptr left, DataAbst
209  }  }
210    
211  // return the shape of the result of "left op right"  // return the shape of the result of "left op right"
212    // the shapes resulting from tensor product are more complex to compute so are worked out elsewhere
213  DataTypes::ShapeType  DataTypes::ShapeType
214  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  resultShape(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
215  {  {
216      if (left->getShape()!=right->getShape())      if (left->getShape()!=right->getShape())
217      {      {
218        if (getOpgroup(op)!=G_BINARY)        if ((getOpgroup(op)!=G_BINARY) && (getOpgroup(op)!=G_NP1OUT))
219        {        {
220          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221        }        }
222    
223        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
224        {        {
225          return right->getShape();          return right->getShape();
# Line 165  resultShape(DataAbstract_ptr left, DataA Line 233  resultShape(DataAbstract_ptr left, DataA
233      return left->getShape();      return left->getShape();
234  }  }
235    
236  // determine the number of points in the result of "left op right"  // return the shape for "op left"
237  size_t  
238  resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataTypes::ShapeType
239    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240  {  {
241     switch (getOpgroup(op))      switch(op)
242     {      {
243     case G_BINARY: return left->getLength();          case TRANS:
244     case G_UNARY: return left->getLength();         {            // for the scoping of variables
245     default:          const DataTypes::ShapeType& s=left->getShape();
246      throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");          DataTypes::ShapeType sh;
247     }          int rank=left->getRank();
248            if (axis_offset<0 || axis_offset>rank)
249            {
250                stringstream e;
251                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252                throw DataException(e.str());
253            }
254            for (int i=0; i<rank; i++)
255            {
256               int index = (axis_offset+i)%rank;
257               sh.push_back(s[index]); // Append to new shape
258            }
259            return sh;
260           }
261        break;
262        case TRACE:
263           {
264            int rank=left->getRank();
265            if (rank<2)
266            {
267               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268            }
269            if ((axis_offset>rank-2) || (axis_offset<0))
270            {
271               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272            }
273            if (rank==2)
274            {
275               return DataTypes::scalarShape;
276            }
277            else if (rank==3)
278            {
279               DataTypes::ShapeType sh;
280                   if (axis_offset==0)
281               {
282                    sh.push_back(left->getShape()[2]);
283                   }
284                   else     // offset==1
285               {
286                sh.push_back(left->getShape()[0]);
287                   }
288               return sh;
289            }
290            else if (rank==4)
291            {
292               DataTypes::ShapeType sh;
293               const DataTypes::ShapeType& s=left->getShape();
294                   if (axis_offset==0)
295               {
296                    sh.push_back(s[2]);
297                    sh.push_back(s[3]);
298                   }
299                   else if (axis_offset==1)
300               {
301                    sh.push_back(s[0]);
302                    sh.push_back(s[3]);
303                   }
304               else     // offset==2
305               {
306                sh.push_back(s[0]);
307                sh.push_back(s[1]);
308               }
309               return sh;
310            }
311            else        // unknown rank
312            {
313               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314            }
315           }
316        break;
317            default:
318        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319        }
320  }  }
321    
322  // determine the number of samples requires to evaluate an expression combining left and right  DataTypes::ShapeType
323  int  SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
324  {  {
325     switch(getOpgroup(op))       // This code taken from the Data.cpp swapaxes() method
326     {       // Some of the checks are probably redundant here
327     case G_IDENTITY: return 1;       int axis0_tmp,axis1_tmp;
328     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);       const DataTypes::ShapeType& s=left->getShape();
329     case G_UNARY: return max(left->getBuffsRequired(),1);       DataTypes::ShapeType out_shape;
330     default:       // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");       // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332     }       int rank=left->getRank();
333         if (rank<2) {
334            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335         }
336         if (axis0<0 || axis0>rank-1) {
337            stringstream e;
338            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339            throw DataException(e.str());
340         }
341         if (axis1<0 || axis1>rank-1) {
342            stringstream e;
343            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344            throw DataException(e.str());
345         }
346         if (axis0 == axis1) {
347             throw DataException("Error - Data::swapaxes: axis indices must be different.");
348         }
349         if (axis0 > axis1) {
350             axis0_tmp=axis1;
351             axis1_tmp=axis0;
352         } else {
353             axis0_tmp=axis0;
354             axis1_tmp=axis1;
355         }
356         for (int i=0; i<rank; i++) {
357           if (i == axis0_tmp) {
358              out_shape.push_back(s[axis1_tmp]);
359           } else if (i == axis1_tmp) {
360              out_shape.push_back(s[axis0_tmp]);
361           } else {
362              out_shape.push_back(s[i]);
363           }
364         }
365        return out_shape;
366  }  }
367    
368    
369    // determine the output shape for the general tensor product operation
370    // the additional parameters return information required later for the product
371    // the majority of this code is copy pasted from C_General_Tensor_Product
372    DataTypes::ShapeType
373    GTPShape(DataAbstract_ptr left, DataAbstract_ptr right, int axis_offset, int transpose, int& SL, int& SM, int& SR)
374    {
375        
376      // Get rank and shape of inputs
377      int rank0 = left->getRank();
378      int rank1 = right->getRank();
379      const DataTypes::ShapeType& shape0 = left->getShape();
380      const DataTypes::ShapeType& shape1 = right->getShape();
381    
382      // Prepare for the loops of the product and verify compatibility of shapes
383      int start0=0, start1=0;
384      if (transpose == 0)       {}
385      else if (transpose == 1)  { start0 = axis_offset; }
386      else if (transpose == 2)  { start1 = rank1-axis_offset; }
387      else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388    
389      if (rank0<axis_offset)
390      {
391        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392      }
393    
394      // Adjust the shapes for transpose
395      DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
396      DataTypes::ShapeType tmpShape1(rank1);    // than using push_back
397      for (int i=0; i<rank0; i++)   { tmpShape0[i]=shape0[(i+start0)%rank0]; }
398      for (int i=0; i<rank1; i++)   { tmpShape1[i]=shape1[(i+start1)%rank1]; }
399    
400      // Prepare for the loops of the product
401      SL=1, SM=1, SR=1;
402      for (int i=0; i<rank0-axis_offset; i++)   {
403        SL *= tmpShape0[i];
404      }
405      for (int i=rank0-axis_offset; i<rank0; i++)   {
406        if (tmpShape0[i] != tmpShape1[i-(rank0-axis_offset)]) {
407          throw DataException("C_GeneralTensorProduct: Error - incompatible shapes");
408        }
409        SM *= tmpShape0[i];
410      }
411      for (int i=axis_offset; i<rank1; i++)     {
412        SR *= tmpShape1[i];
413      }
414    
415      // Define the shape of the output (rank of shape is the sum of the loop ranges below)
416      DataTypes::ShapeType shape2(rank0+rank1-2*axis_offset);  
417      {         // block to limit the scope of out_index
418         int out_index=0;
419         for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420         for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
421      }
422    
423      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
424      {
425         ostringstream os;
426         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
427         throw DataException(os.str());
428      }
429    
430      return shape2;
431    }
432    
433  }   // end anonymous namespace  }   // end anonymous namespace
434    
435    
# Line 208  opToString(ES_optype op) Line 445  opToString(ES_optype op)
445    return ES_opstrings[op];    return ES_opstrings[op];
446  }  }
447    
448    void DataLazy::LazyNodeSetup()
449    {
450    #ifdef _OPENMP
451        int numthreads=omp_get_max_threads();
452        m_samples.resize(numthreads*m_samplesize);
453        m_sampleids=new int[numthreads];
454        for (int i=0;i<numthreads;++i)
455        {
456            m_sampleids[i]=-1;  
457        }
458    #else
459        m_samples.resize(m_samplesize);
460        m_sampleids=new int[1];
461        m_sampleids[0]=-1;
462    #endif  // _OPENMP
463    }
464    
465    
466    // Creates an identity node
467  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
468      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
469      m_op(IDENTITY)      ,m_sampleids(0),
470        m_samples(1)
471  {  {
472     if (p->isLazy())     if (p->isLazy())
473     {     {
# Line 222  DataLazy::DataLazy(DataAbstract_ptr p) Line 478  DataLazy::DataLazy(DataAbstract_ptr p)
478     }     }
479     else     else
480     {     {
481      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
482      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
483      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
484      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
485     }     }
486     m_length=p->getLength();  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_buffsRequired=1;  
    m_samplesize=getNumDPPSample()*getNoValues();  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
487  }  }
488    
   
   
   
489  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
490      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
491      m_op(op)      m_op(op),
492        m_axis_offset(0),
493        m_transpose(0),
494        m_SL(0), m_SM(0), m_SR(0)
495  {  {
496     if (getOpgroup(op)!=G_UNARY)     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
497     {     {
498      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
499     }     }
500    
501     DataLazy_ptr lleft;     DataLazy_ptr lleft;
502     if (!left->isLazy())     if (!left->isLazy())
503     {     {
# Line 255  DataLazy::DataLazy(DataAbstract_ptr left Line 508  DataLazy::DataLazy(DataAbstract_ptr left
508      lleft=dynamic_pointer_cast<DataLazy>(left);      lleft=dynamic_pointer_cast<DataLazy>(left);
509     }     }
510     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
    m_length=left->getLength();  
511     m_left=lleft;     m_left=lleft;
    m_buffsRequired=1;  
512     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
513       m_children=m_left->m_children+1;
514       m_height=m_left->m_height+1;
515       LazyNodeSetup();
516       SIZELIMIT
517  }  }
518    
519    
520  // In this constructor we need to consider interpolation  // In this constructor we need to consider interpolation
521  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)
522      : parent(resultFS(left,right,op), resultShape(left,right,op)),      : parent(resultFS(left,right,op), resultShape(left,right,op)),
523      m_op(op)      m_op(op),
524        m_SL(0), m_SM(0), m_SR(0)
525  {  {
526     if (getOpgroup(op)!=G_BINARY)  LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
527       if ((getOpgroup(op)!=G_BINARY))
528     {     {
529      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
530     }     }
# Line 279  DataLazy::DataLazy(DataAbstract_ptr left Line 536  DataLazy::DataLazy(DataAbstract_ptr left
536      Data tmp(ltemp,fs);      Data tmp(ltemp,fs);
537      left=tmp.borrowDataPtr();      left=tmp.borrowDataPtr();
538     }     }
539     if (getFunctionSpace()!=right->getFunctionSpace())   // left needs to be interpolated     if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
540     {     {
541      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
542      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
543    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
544     }     }
545     left->operandCheck(*right);     left->operandCheck(*right);
546    
547     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
548     {     {
549      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
550    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
551     }     }
552     else     else
553     {     {
554      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
555    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
556     }     }
557     if (right->isLazy())     if (right->isLazy())
558     {     {
559      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
560    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
561     }     }
562     else     else
563     {     {
564      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
565    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
566     }     }
567     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
568     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 316  DataLazy::DataLazy(DataAbstract_ptr left Line 578  DataLazy::DataLazy(DataAbstract_ptr left
578     {     {
579      m_readytype='C';      m_readytype='C';
580     }     }
581     m_length=resultLength(m_left,m_right,m_op);     m_samplesize=getNumDPPSample()*getNoValues();
582     m_samplesize=getNumDPPSample()*getNoValues();         m_children=m_left->m_children+m_right->m_children+2;
583     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
584  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
585       SIZELIMIT
586    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
587  }  }
588    
589    DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
590        : parent(resultFS(left,right,op), GTPShape(left,right, axis_offset, transpose, m_SL,m_SM, m_SR)),
591        m_op(op),
592        m_axis_offset(axis_offset),
593        m_transpose(transpose)
594    {
595       if ((getOpgroup(op)!=G_TENSORPROD))
596       {
597        throw DataException("Programmer error - constructor DataLazy(left, right, op, ax, tr) will only process BINARY operations which require parameters.");
598       }
599       if ((transpose>2) || (transpose<0))
600       {
601        throw DataException("DataLazy GeneralTensorProduct constructor: Error - transpose should be 0, 1 or 2");
602       }
603       if (getFunctionSpace()!=left->getFunctionSpace())    // left needs to be interpolated
604       {
605        FunctionSpace fs=getFunctionSpace();
606        Data ltemp(left);
607        Data tmp(ltemp,fs);
608        left=tmp.borrowDataPtr();
609       }
610       if (getFunctionSpace()!=right->getFunctionSpace())   // right needs to be interpolated
611       {
612        Data tmp(Data(right),getFunctionSpace());
613        right=tmp.borrowDataPtr();
614       }
615    //    left->operandCheck(*right);
616    
617  DataLazy::~DataLazy()     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
618       {
619        m_left=dynamic_pointer_cast<DataLazy>(left);
620       }
621       else
622       {
623        m_left=DataLazy_ptr(new DataLazy(left));
624       }
625       if (right->isLazy())
626       {
627        m_right=dynamic_pointer_cast<DataLazy>(right);
628       }
629       else
630       {
631        m_right=DataLazy_ptr(new DataLazy(right));
632       }
633       char lt=m_left->m_readytype;
634       char rt=m_right->m_readytype;
635       if (lt=='E' || rt=='E')
636       {
637        m_readytype='E';
638       }
639       else if (lt=='T' || rt=='T')
640       {
641        m_readytype='T';
642       }
643       else
644       {
645        m_readytype='C';
646       }
647       m_samplesize=getNumDPPSample()*getNoValues();
648       m_children=m_left->m_children+m_right->m_children+2;
649       m_height=max(m_left->m_height,m_right->m_height)+1;
650       LazyNodeSetup();
651       SIZELIMIT
652    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
653    }
654    
655    
656    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
657        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
658        m_op(op),
659        m_axis_offset(axis_offset),
660        m_transpose(0),
661        m_tol(0)
662  {  {
663       if ((getOpgroup(op)!=G_NP1OUT_P))
664       {
665        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
666       }
667       DataLazy_ptr lleft;
668       if (!left->isLazy())
669       {
670        lleft=DataLazy_ptr(new DataLazy(left));
671       }
672       else
673       {
674        lleft=dynamic_pointer_cast<DataLazy>(left);
675       }
676       m_readytype=lleft->m_readytype;
677       m_left=lleft;
678       m_samplesize=getNumDPPSample()*getNoValues();
679       m_children=m_left->m_children+1;
680       m_height=m_left->m_height+1;
681       LazyNodeSetup();
682       SIZELIMIT
683    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
684    }
685    
686    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
687        : parent(left->getFunctionSpace(), left->getShape()),
688        m_op(op),
689        m_axis_offset(0),
690        m_transpose(0),
691        m_tol(tol)
692    {
693       if ((getOpgroup(op)!=G_UNARY_P))
694       {
695        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
696       }
697       DataLazy_ptr lleft;
698       if (!left->isLazy())
699       {
700        lleft=DataLazy_ptr(new DataLazy(left));
701       }
702       else
703       {
704        lleft=dynamic_pointer_cast<DataLazy>(left);
705       }
706       m_readytype=lleft->m_readytype;
707       m_left=lleft;
708       m_samplesize=getNumDPPSample()*getNoValues();
709       m_children=m_left->m_children+1;
710       m_height=m_left->m_height+1;
711       LazyNodeSetup();
712       SIZELIMIT
713    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
714  }  }
715    
716    
717  int  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
718  DataLazy::getBuffsRequired() const      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
719        m_op(op),
720        m_axis_offset(axis0),
721        m_transpose(axis1),
722        m_tol(0)
723  {  {
724      return m_buffsRequired;     if ((getOpgroup(op)!=G_NP1OUT_2P))
725       {
726        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
727       }
728       DataLazy_ptr lleft;
729       if (!left->isLazy())
730       {
731        lleft=DataLazy_ptr(new DataLazy(left));
732       }
733       else
734       {
735        lleft=dynamic_pointer_cast<DataLazy>(left);
736       }
737       m_readytype=lleft->m_readytype;
738       m_left=lleft;
739       m_samplesize=getNumDPPSample()*getNoValues();
740       m_children=m_left->m_children+1;
741       m_height=m_left->m_height+1;
742       LazyNodeSetup();
743       SIZELIMIT
744    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
745    }
746    
747    
748    namespace
749    {
750    
751        inline int max3(int a, int b, int c)
752        {
753        int t=(a>b?a:b);
754        return (t>c?t:c);
755    
756        }
757    }
758    
759    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
760        : parent(left->getFunctionSpace(), left->getShape()),
761        m_op(CONDEVAL),
762        m_axis_offset(0),
763        m_transpose(0),
764        m_tol(0)
765    {
766    
767       DataLazy_ptr lmask;
768       DataLazy_ptr lleft;
769       DataLazy_ptr lright;
770       if (!mask->isLazy())
771       {
772        lmask=DataLazy_ptr(new DataLazy(mask));
773       }
774       else
775       {
776        lmask=dynamic_pointer_cast<DataLazy>(mask);
777       }
778       if (!left->isLazy())
779       {
780        lleft=DataLazy_ptr(new DataLazy(left));
781       }
782       else
783       {
784        lleft=dynamic_pointer_cast<DataLazy>(left);
785       }
786       if (!right->isLazy())
787       {
788        lright=DataLazy_ptr(new DataLazy(right));
789       }
790       else
791       {
792        lright=dynamic_pointer_cast<DataLazy>(right);
793       }
794       m_readytype=lmask->m_readytype;
795       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
796       {
797        throw DataException("Programmer Error - condEval arguments must have the same readytype");
798       }
799       m_left=lleft;
800       m_right=lright;
801       m_mask=lmask;
802       m_samplesize=getNumDPPSample()*getNoValues();
803       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
804       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
805       LazyNodeSetup();
806       SIZELIMIT
807    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
808    }
809    
810    
811    
812    DataLazy::~DataLazy()
813    {
814       delete[] m_sampleids;
815  }  }
816    
817    
# Line 354  DataLazy::collapseToReady() Line 834  DataLazy::collapseToReady()
834    DataReady_ptr pleft=m_left->collapseToReady();    DataReady_ptr pleft=m_left->collapseToReady();
835    Data left(pleft);    Data left(pleft);
836    Data right;    Data right;
837    if (getOpgroup(m_op)==G_BINARY)    if ((getOpgroup(m_op)==G_BINARY) || (getOpgroup(m_op)==G_TENSORPROD))
838    {    {
839      right=Data(m_right->collapseToReady());      right=Data(m_right->collapseToReady());
840    }    }
# Line 453  DataLazy::collapseToReady() Line 933  DataLazy::collapseToReady()
933      case LEZ:      case LEZ:
934      result=left.whereNonPositive();      result=left.whereNonPositive();
935      break;      break;
936        case NEZ:
937        result=left.whereNonZero(m_tol);
938        break;
939        case EZ:
940        result=left.whereZero(m_tol);
941        break;
942        case SYM:
943        result=left.symmetric();
944        break;
945        case NSYM:
946        result=left.nonsymmetric();
947        break;
948        case PROD:
949        result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
950        break;
951        case TRANS:
952        result=left.transpose(m_axis_offset);
953        break;
954        case TRACE:
955        result=left.trace(m_axis_offset);
956        break;
957        case SWAP:
958        result=left.swapaxes(m_axis_offset, m_transpose);
959        break;
960        case MINVAL:
961        result=left.minval();
962        break;
963        case MAXVAL:
964        result=left.minval();
965        break;
966      default:      default:
967      throw DataException("Programmer error - do not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
968    }    }
969    return result.borrowReadyPtr();    return result.borrowReadyPtr();
970  }  }
# Line 480  DataLazy::collapse() Line 990  DataLazy::collapse()
990    m_op=IDENTITY;    m_op=IDENTITY;
991  }  }
992    
993  /*  
994    \brief Compute the value of the expression (binary operation) for the given sample.  
995    \return Vector which stores the value of the subexpression for the given sample.  
996    \param v A vector to store intermediate results.  
997    \param offset Index in v to begin storing results.  
998    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
999    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1000        {\
1001    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1002    If the result is stored in v it should be stored at the offset given.        { \
1003    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1004  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1005  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1006  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1007             lroffset+=leftstep; \
1008             rroffset+=rightstep; \
1009          }\
1010          lroffset+=oleftstep;\
1011          rroffset+=orightstep;\
1012        }
1013    
1014    
1015    // The result will be stored in m_samples
1016    // The return value is a pointer to the DataVector, offset is the offset within the return value
1017    const DataTypes::ValueType*
1018    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1019    {
1020    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1021        // collapse so we have a 'E' node or an IDENTITY for some other type
1022      if (m_readytype!='E' && m_op!=IDENTITY)
1023      {
1024        collapse();
1025      }
1026      if (m_op==IDENTITY)  
1027      {
1028        const ValueType& vec=m_id->getVectorRO();
1029        roffset=m_id->getPointOffset(sampleNo, 0);
1030    #ifdef LAZY_STACK_PROF
1031    int x;
1032    if (&x<stackend[omp_get_thread_num()])
1033    {
1034           stackend[omp_get_thread_num()]=&x;
1035    }
1036    #endif
1037        return &(vec);
1038      }
1039      if (m_readytype!='E')
1040      {
1041        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1042      }
1043      if (m_sampleids[tid]==sampleNo)
1044      {
1045        roffset=tid*m_samplesize;
1046        return &(m_samples);        // sample is already resolved
1047      }
1048      m_sampleids[tid]=sampleNo;
1049    
1050      switch (getOpgroup(m_op))
1051      {
1052      case G_UNARY:
1053      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1054      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1055      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1056      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1057      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1058      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1059      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1060      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1061      default:
1062        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1063      }
1064    }
1065    
1066    const DataTypes::ValueType*
1067    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1068  {  {
1069      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1070      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1071      // processing single points.      // processing single points.
1072        // we will also know we won't get identity nodes
1073    if (m_readytype!='E')    if (m_readytype!='E')
1074    {    {
1075      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1076    }    }
1077    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1078    const double* left=&((*vleft)[roffset]);    {
1079    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1080    roffset=offset;    }
1081      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1082      const double* left=&((*leftres)[roffset]);
1083      roffset=m_samplesize*tid;
1084      double* result=&(m_samples[roffset]);
1085    switch (m_op)    switch (m_op)
1086    {    {
1087      case SIN:        case SIN:  
1088      tensor_unary_operation(m_samplesize, left, result, ::sin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1089      break;      break;
1090      case COS:      case COS:
1091      tensor_unary_operation(m_samplesize, left, result, ::cos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1092      break;      break;
1093      case TAN:      case TAN:
1094      tensor_unary_operation(m_samplesize, left, result, ::tan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1095      break;      break;
1096      case ASIN:      case ASIN:
1097      tensor_unary_operation(m_samplesize, left, result, ::asin);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1098      break;      break;
1099      case ACOS:      case ACOS:
1100      tensor_unary_operation(m_samplesize, left, result, ::acos);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1101      break;      break;
1102      case ATAN:      case ATAN:
1103      tensor_unary_operation(m_samplesize, left, result, ::atan);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1104      break;      break;
1105      case SINH:      case SINH:
1106      tensor_unary_operation(m_samplesize, left, result, ::sinh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1107      break;      break;
1108      case COSH:      case COSH:
1109      tensor_unary_operation(m_samplesize, left, result, ::cosh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1110      break;      break;
1111      case TANH:      case TANH:
1112      tensor_unary_operation(m_samplesize, left, result, ::tanh);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1113      break;      break;
1114      case ERF:      case ERF:
1115  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1116      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");      throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1117  #else  #else
1118      tensor_unary_operation(m_samplesize, left, result, ::erf);      tensor_unary_operation(m_samplesize, left, result, ::erf);
1119      break;      break;
1120  #endif  #endif
1121     case ASINH:     case ASINH:
1122  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1123      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1124  #else  #else
1125      tensor_unary_operation(m_samplesize, left, result, ::asinh);      tensor_unary_operation(m_samplesize, left, result, ::asinh);
1126  #endif    #endif  
1127      break;      break;
1128     case ACOSH:     case ACOSH:
1129  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1130      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1131  #else  #else
1132      tensor_unary_operation(m_samplesize, left, result, ::acosh);      tensor_unary_operation(m_samplesize, left, result, ::acosh);
1133  #endif    #endif  
1134      break;      break;
1135     case ATANH:     case ATANH:
1136  #ifdef _WIN32  #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1137      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);      tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1138  #else  #else
1139      tensor_unary_operation(m_samplesize, left, result, ::atanh);      tensor_unary_operation(m_samplesize, left, result, ::atanh);
1140  #endif    #endif  
1141      break;      break;
1142      case LOG10:      case LOG10:
1143      tensor_unary_operation(m_samplesize, left, result, ::log10);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1144      break;      break;
1145      case LOG:      case LOG:
1146      tensor_unary_operation(m_samplesize, left, result, ::log);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1147      break;      break;
1148      case SIGN:      case SIGN:
1149      tensor_unary_operation(m_samplesize, left, result, escript::fsign);      tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1150      break;      break;
1151      case ABS:      case ABS:
1152      tensor_unary_operation(m_samplesize, left, result, ::fabs);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1153      break;      break;
1154      case NEG:      case NEG:
1155      tensor_unary_operation(m_samplesize, left, result, negate<double>());      tensor_unary_operation(m_samplesize, left, result, negate<double>());
# Line 584  DataLazy::resolveUnary(ValueType& v, siz Line 1160  DataLazy::resolveUnary(ValueType& v, siz
1160      throw DataException("Programmer error - POS not supported for lazy data.");      throw DataException("Programmer error - POS not supported for lazy data.");
1161      break;      break;
1162      case EXP:      case EXP:
1163      tensor_unary_operation(m_samplesize, left, result, ::exp);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1164      break;      break;
1165      case SQRT:      case SQRT:
1166      tensor_unary_operation(m_samplesize, left, result, ::sqrt);      tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1167      break;      break;
1168      case RECIP:      case RECIP:
1169      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));      tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
# Line 604  DataLazy::resolveUnary(ValueType& v, siz Line 1180  DataLazy::resolveUnary(ValueType& v, siz
1180      case LEZ:      case LEZ:
1181      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1182      break;      break;
1183    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1184        case NEZ:
1185        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1186        break;
1187        case EZ:
1188        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1189        break;
1190    
1191      default:      default:
1192      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1193    }    }
1194    return &v;    return &(m_samples);
1195  }  }
1196    
1197    
1198    const DataTypes::ValueType*
1199    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1200    {
1201        // we assume that any collapsing has been done before we get here
1202        // since we only have one argument we don't need to think about only
1203        // processing single points.
1204        // we will also know we won't get identity nodes
1205      if (m_readytype!='E')
1206      {
1207        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1208      }
1209      if (m_op==IDENTITY)
1210      {
1211        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1212      }
1213      size_t loffset=0;
1214      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1215    
1216      roffset=m_samplesize*tid;
1217      unsigned int ndpps=getNumDPPSample();
1218      unsigned int psize=DataTypes::noValues(m_left->getShape());
1219      double* result=&(m_samples[roffset]);
1220      switch (m_op)
1221      {
1222        case MINVAL:
1223        {
1224          for (unsigned int z=0;z<ndpps;++z)
1225          {
1226            FMin op;
1227            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1228            loffset+=psize;
1229            result++;
1230          }
1231        }
1232        break;
1233        case MAXVAL:
1234        {
1235          for (unsigned int z=0;z<ndpps;++z)
1236          {
1237          FMax op;
1238          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1239          loffset+=psize;
1240          result++;
1241          }
1242        }
1243        break;
1244        default:
1245        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1246      }
1247      return &(m_samples);
1248    }
1249    
1250    const DataTypes::ValueType*
1251    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1252    {
1253        // we assume that any collapsing has been done before we get here
1254        // since we only have one argument we don't need to think about only
1255        // processing single points.
1256      if (m_readytype!='E')
1257      {
1258        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1259      }
1260      if (m_op==IDENTITY)
1261      {
1262        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1263      }
1264      size_t subroffset;
1265      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1266      roffset=m_samplesize*tid;
1267      size_t loop=0;
1268      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1269      size_t step=getNoValues();
1270      size_t offset=roffset;
1271      switch (m_op)
1272      {
1273        case SYM:
1274        for (loop=0;loop<numsteps;++loop)
1275        {
1276            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1277            subroffset+=step;
1278            offset+=step;
1279        }
1280        break;
1281        case NSYM:
1282        for (loop=0;loop<numsteps;++loop)
1283        {
1284            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1285            subroffset+=step;
1286            offset+=step;
1287        }
1288        break;
1289        default:
1290        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1291      }
1292      return &m_samples;
1293    }
1294    
1295    const DataTypes::ValueType*
1296    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1297    {
1298        // we assume that any collapsing has been done before we get here
1299        // since we only have one argument we don't need to think about only
1300        // processing single points.
1301      if (m_readytype!='E')
1302      {
1303        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1304      }
1305      if (m_op==IDENTITY)
1306      {
1307        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1308      }
1309      size_t subroffset;
1310      size_t offset;
1311      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1312      roffset=m_samplesize*tid;
1313      offset=roffset;
1314      size_t loop=0;
1315      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1316      size_t outstep=getNoValues();
1317      size_t instep=m_left->getNoValues();
1318      switch (m_op)
1319      {
1320        case TRACE:
1321        for (loop=0;loop<numsteps;++loop)
1322        {
1323                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1324            subroffset+=instep;
1325            offset+=outstep;
1326        }
1327        break;
1328        case TRANS:
1329        for (loop=0;loop<numsteps;++loop)
1330        {
1331                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1332            subroffset+=instep;
1333            offset+=outstep;
1334        }
1335        break;
1336        default:
1337        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1338      }
1339      return &m_samples;
1340    }
1341    
1342    
1343  #define PROC_OP(X) \  const DataTypes::ValueType*
1344      for (int i=0;i<steps;++i,resultp+=resultStep) \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1345      { \  {
1346         tensor_binary_operation(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    if (m_readytype!='E')
1347         lroffset+=leftStep; \    {
1348         rroffset+=rightStep; \      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1349      }
1350      if (m_op==IDENTITY)
1351      {
1352        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1353      }
1354      size_t subroffset;
1355      size_t offset;
1356      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1357      roffset=m_samplesize*tid;
1358      offset=roffset;
1359      size_t loop=0;
1360      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1361      size_t outstep=getNoValues();
1362      size_t instep=m_left->getNoValues();
1363      switch (m_op)
1364      {
1365        case SWAP:
1366        for (loop=0;loop<numsteps;++loop)
1367        {
1368                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1369            subroffset+=instep;
1370            offset+=outstep;
1371      }      }
1372        break;
1373        default:
1374        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1375      }
1376      return &m_samples;
1377    }
1378    
1379    const DataTypes::ValueType*
1380    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1381    {
1382      if (m_readytype!='E')
1383      {
1384        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1385      }
1386      if (m_op!=CONDEVAL)
1387      {
1388        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1389      }
1390      size_t subroffset;
1391    
1392      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1393      const ValueType* srcres=0;
1394      if ((*maskres)[subroffset]>0)
1395      {
1396        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1397      }
1398      else
1399      {
1400        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1401      }
1402    
1403      // Now we need to copy the result
1404    
1405      roffset=m_samplesize*tid;
1406      for (int i=0;i<m_samplesize;++i)
1407      {
1408        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1409      }
1410    
1411      return &m_samples;
1412    }
1413    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1414  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1415  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1416  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 644  DataLazy::resolveUnary(ValueType& v, siz Line 1420  DataLazy::resolveUnary(ValueType& v, siz
1420  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1421  // For example, 2+Vector.  // For example, 2+Vector.
1422  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1423  DataTypes::ValueType*  const DataTypes::ValueType*
1424  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1425  {  {
1426  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1427    
1428    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1429      // first work out which of the children are expanded      // first work out which of the children are expanded
1430    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1431    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1432    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1433    int steps=(bigloops?1:getNumDPPSample());    {
1434    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1435    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1436    {    bool leftScalar=(m_left->getRank()==0);
1437      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1438      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1439      chunksize=1;    // for scalar    {
1440    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1441    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1442    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1443    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1444      size_t chunksize=1;           // how many doubles will be processed in one go
1445      int leftstep=0;       // how far should the left offset advance after each step
1446      int rightstep=0;
1447      int numsteps=0;       // total number of steps for the inner loop
1448      int oleftstep=0;  // the o variables refer to the outer loop
1449      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1450      int onumsteps=1;
1451      
1452      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1453      bool RES=(rightExp && rightScalar);
1454      bool LS=(!leftExp && leftScalar); // left is a single scalar
1455      bool RS=(!rightExp && rightScalar);
1456      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1457      bool RN=(!rightExp && !rightScalar);
1458      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1459      bool REN=(rightExp && !rightScalar);
1460    
1461      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1462      {
1463        chunksize=m_left->getNumDPPSample()*leftsize;
1464        leftstep=0;
1465        rightstep=0;
1466        numsteps=1;
1467      }
1468      else if (LES || RES)
1469      {
1470        chunksize=1;
1471        if (LES)        // left is an expanded scalar
1472        {
1473            if (RS)
1474            {
1475               leftstep=1;
1476               rightstep=0;
1477               numsteps=m_left->getNumDPPSample();
1478            }
1479            else        // RN or REN
1480            {
1481               leftstep=0;
1482               oleftstep=1;
1483               rightstep=1;
1484               orightstep=(RN ? -(int)rightsize : 0);
1485               numsteps=rightsize;
1486               onumsteps=m_left->getNumDPPSample();
1487            }
1488        }
1489        else        // right is an expanded scalar
1490        {
1491            if (LS)
1492            {
1493               rightstep=1;
1494               leftstep=0;
1495               numsteps=m_right->getNumDPPSample();
1496            }
1497            else
1498            {
1499               rightstep=0;
1500               orightstep=1;
1501               leftstep=1;
1502               oleftstep=(LN ? -(int)leftsize : 0);
1503               numsteps=leftsize;
1504               onumsteps=m_right->getNumDPPSample();
1505            }
1506        }
1507      }
1508      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1509      {
1510        if (LEN)    // and Right will be a single value
1511        {
1512            chunksize=rightsize;
1513            leftstep=rightsize;
1514            rightstep=0;
1515            numsteps=m_left->getNumDPPSample();
1516            if (RS)
1517            {
1518               numsteps*=leftsize;
1519            }
1520        }
1521        else    // REN
1522        {
1523            chunksize=leftsize;
1524            rightstep=leftsize;
1525            leftstep=0;
1526            numsteps=m_right->getNumDPPSample();
1527            if (LS)
1528            {
1529               numsteps*=rightsize;
1530            }
1531        }
1532      }
1533    
1534      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1535      // Get the values of sub-expressions      // Get the values of sub-expressions
1536    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1537    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1538      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1539    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1540    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1541    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1542    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1543    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1544    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1545    
1546    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1547    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1548    
1549    
1550      roffset=m_samplesize*tid;
1551      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
1552    switch(m_op)    switch(m_op)
1553    {    {
1554      case ADD:      case ADD:
1555      PROC_OP(plus<double>());          PROC_OP(NO_ARG,plus<double>());
1556      break;      break;
1557      case SUB:      case SUB:
1558      PROC_OP(minus<double>());      PROC_OP(NO_ARG,minus<double>());
1559      break;      break;
1560      case MUL:      case MUL:
1561      PROC_OP(multiplies<double>());      PROC_OP(NO_ARG,multiplies<double>());
1562      break;      break;
1563      case DIV:      case DIV:
1564      PROC_OP(divides<double>());      PROC_OP(NO_ARG,divides<double>());
1565      break;      break;
1566      case POW:      case POW:
1567      PROC_OP(::pow);         PROC_OP(double (double,double),::pow);
1568      break;      break;
1569      default:      default:
1570      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1571    }    }
1572    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1573    return &v;    return &m_samples;
1574  }  }
1575    
1576    
1577    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1578    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1579    // unlike the other resolve helpers, we must treat these datapoints separately.
1580    const DataTypes::ValueType*
1581    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1582    {
1583    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1584    
1585  /*    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1586    \brief Compute the value of the expression for the given sample.      // first work out which of the children are expanded
1587    \return Vector which stores the value of the subexpression for the given sample.    bool leftExp=(m_left->m_readytype=='E');
1588    \param v A vector to store intermediate results.    bool rightExp=(m_right->m_readytype=='E');
1589    \param offset Index in v to begin storing results.    int steps=getNumDPPSample();
1590    \param sampleNo Sample number to evaluate.    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1591    \param roffset (output parameter) the offset in the return vector where the result begins.    int rightStep=(rightExp?m_right->getNoValues() : 0);
1592    
1593      int resultStep=getNoValues();
1594      roffset=m_samplesize*tid;
1595      size_t offset=roffset;
1596    
1597      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1598    
1599      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1600    
1601    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1602    cout << getNoValues() << endl;)
1603    
1604    
1605    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1606    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1607    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1608    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1609    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1610    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1611    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1612    
1613      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
1614      switch(m_op)
1615      {
1616        case PROD:
1617        for (int i=0;i<steps;++i,resultp+=resultStep)
1618        {
1619              const double *ptr_0 = &((*left)[lroffset]);
1620              const double *ptr_1 = &((*right)[rroffset]);
1621    
1622    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1623    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1624    
1625              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1626    
1627          lroffset+=leftStep;
1628          rroffset+=rightStep;
1629        }
1630        break;
1631        default:
1632        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1633      }
1634      roffset=offset;
1635      return &m_samples;
1636    }
1637    
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1638    
 // the roffset is the offset within the returned vector where the data begins  
1639  const DataTypes::ValueType*  const DataTypes::ValueType*
1640  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset)
1641  {  {
1642  cout << "Resolve sample " << toString() << endl;  #ifdef _OPENMP
1643      // collapse so we have a 'E' node or an IDENTITY for some other type      int tid=omp_get_thread_num();
1644    if (m_readytype!='E' && m_op!=IDENTITY)  #else
1645        int tid=0;
1646    #endif
1647    
1648    #ifdef LAZY_STACK_PROF
1649        stackstart[tid]=&tid;
1650        stackend[tid]=&tid;
1651        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1652        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1653        #pragma omp critical
1654        if (d>maxstackuse)
1655        {
1656    cout << "Max resolve Stack use " << d << endl;
1657            maxstackuse=d;
1658        }
1659        return r;
1660    #else
1661        return resolveNodeSample(tid, sampleNo, roffset);
1662    #endif
1663    }
1664    
1665    
1666    // This needs to do the work of the identity constructor
1667    void
1668    DataLazy::resolveToIdentity()
1669    {
1670       if (m_op==IDENTITY)
1671        return;
1672       DataReady_ptr p=resolveNodeWorker();
1673       makeIdentity(p);
1674    }
1675    
1676    void DataLazy::makeIdentity(const DataReady_ptr& p)
1677    {
1678       m_op=IDENTITY;
1679       m_axis_offset=0;
1680       m_transpose=0;
1681       m_SL=m_SM=m_SR=0;
1682       m_children=m_height=0;
1683       m_id=p;
1684       if(p->isConstant()) {m_readytype='C';}
1685       else if(p->isExpanded()) {m_readytype='E';}
1686       else if (p->isTagged()) {m_readytype='T';}
1687       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1688       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1689       m_left.reset();
1690       m_right.reset();
1691    }
1692    
1693    
1694    DataReady_ptr
1695    DataLazy::resolve()
1696    {
1697        resolveToIdentity();
1698        return m_id;
1699    }
1700    
1701    
1702    /* This is really a static method but I think that caused problems in windows */
1703    void
1704    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1705    {
1706      if (dats.empty())
1707    {    {
1708      collapse();      return;
1709    }    }
1710    if (m_op==IDENTITY)      vector<DataLazy*> work;
1711      FunctionSpace fs=dats[0]->getFunctionSpace();
1712      bool match=true;
1713      for (int i=dats.size()-1;i>=0;--i)
1714    {    {
1715      const ValueType& vec=m_id->getVector();      if (dats[i]->m_readytype!='E')
1716      if (m_readytype=='C')      {
1717      {          dats[i]->collapse();
1718      roffset=0;      }
1719      return &(vec);      if (dats[i]->m_op!=IDENTITY)
1720      }      {
1721      roffset=m_id->getPointOffset(sampleNo, 0);          work.push_back(dats[i]);
1722      return &(vec);          if (fs!=dats[i]->getFunctionSpace())
1723            {
1724                match=false;
1725            }
1726        }
1727    }    }
1728    if (m_readytype!='E')    if (work.empty())
1729    {    {
1730      throw DataException("Programmer Error - Collapse did not produce an expanded node.");      return;     // no work to do
1731    }    }
1732    switch (getOpgroup(m_op))    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1733      {     // it is possible that dats[0] is one of the objects which we discarded and
1734            // all the other functionspaces match.
1735        vector<DataExpanded*> dep;
1736        vector<ValueType*> vecs;
1737        for (int i=0;i<work.size();++i)
1738        {
1739            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1740            vecs.push_back(&(dep[i]->getVectorRW()));
1741        }
1742        int totalsamples=work[0]->getNumSamples();
1743        const ValueType* res=0; // Storage for answer
1744        int sample;
1745        #pragma omp parallel private(sample, res)
1746        {
1747            size_t roffset=0;
1748            #pragma omp for schedule(static)
1749            for (sample=0;sample<totalsamples;++sample)
1750            {
1751            roffset=0;
1752            int j;
1753            for (j=work.size()-1;j>=0;--j)
1754            {
1755    #ifdef _OPENMP
1756                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1757    #else
1758                    res=work[j]->resolveNodeSample(0,sample,roffset);
1759    #endif
1760                    DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1761                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1762            }
1763            }
1764        }
1765        // Now we need to load the new results as identity ops into the lazy nodes
1766        for (int i=work.size()-1;i>=0;--i)
1767        {
1768            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1769        }
1770      }
1771      else  // functionspaces do not match
1772    {    {
1773    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      for (int i=0;i<work.size();++i)
1774    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      {
1775    default:          work[i]->resolveToIdentity();
1776      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      }
1777    }    }
1778  }  }
1779    
1780    
1781  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1782  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1783  DataReady_ptr  DataReady_ptr
1784  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1785  {  {
   
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
1786    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1787    {    {
1788      collapse();      collapse();
# Line 764  cout << "Buffers=" << m_buffsRequired << Line 1792  cout << "Buffers=" << m_buffsRequired <<
1792      return m_id;      return m_id;
1793    }    }
1794      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_samplesize*(max(1,m_buffsRequired));    // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
   int threadnum=0;  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1795    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1796    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1797    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1798    
1799    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1800    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1801    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1802    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1803    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)    #pragma omp parallel private(sample,res)
1804    for (sample=0;sample<totalsamples;++sample)    {
1805    {      size_t roffset=0;
1806  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1807        stackstart[omp_get_thread_num()]=&roffset;
1808        stackend[omp_get_thread_num()]=&roffset;
1809    #endif
1810        #pragma omp for schedule(static)
1811        for (sample=0;sample<totalsamples;++sample)
1812        {
1813            roffset=0;
1814  #ifdef _OPENMP  #ifdef _OPENMP
1815      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1816  #else  #else
1817      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1818  #endif  #endif
1819  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1820      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1821  cerr << "offset=" << outoffset << endl;              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1822      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1823      {      }
1824      resvec[outoffset]=(*res)[resoffset];    }
1825      }  #ifdef LAZY_STACK_PROF
1826  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1827      {
1828        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1829    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1830        if (r>maxstackuse)
1831        {
1832            maxstackuse=r;
1833        }
1834    }    }
1835      cout << "Max resolve Stack use=" << maxstackuse << endl;
1836    #endif
1837    return resptr;    return resptr;
1838  }  }
1839    
# Line 806  std::string Line 1841  std::string
1841  DataLazy::toString() const  DataLazy::toString() const
1842  {  {
1843    ostringstream oss;    ostringstream oss;
1844    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1845    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1846      {
1847      case 1:   // tree format
1848        oss << endl;
1849        intoTreeString(oss,"");
1850        break;
1851      case 2:   // just the depth
1852        break;
1853      default:
1854        intoString(oss);
1855        break;
1856      }
1857    return oss.str();    return oss.str();
1858  }  }
1859    
# Line 815  DataLazy::toString() const Line 1861  DataLazy::toString() const
1861  void  void
1862  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1863  {  {
1864    //    oss << "[" << m_children <<";"<<m_height <<"]";
1865    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1866    {    {
1867    case G_IDENTITY:    case G_IDENTITY:
# Line 844  DataLazy::intoString(ostringstream& oss) Line 1891  DataLazy::intoString(ostringstream& oss)
1891      oss << ')';      oss << ')';
1892      break;      break;
1893    case G_UNARY:    case G_UNARY:
1894      case G_UNARY_P:
1895      case G_NP1OUT:
1896      case G_NP1OUT_P:
1897      case G_REDUCTION:
1898      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1899      m_left->intoString(oss);      m_left->intoString(oss);
1900      oss << ')';      oss << ')';
1901      break;      break;
1902      case G_TENSORPROD:
1903        oss << opToString(m_op) << '(';
1904        m_left->intoString(oss);
1905        oss << ", ";
1906        m_right->intoString(oss);
1907        oss << ')';
1908        break;
1909      case G_NP1OUT_2P:
1910        oss << opToString(m_op) << '(';
1911        m_left->intoString(oss);
1912        oss << ", " << m_axis_offset << ", " << m_transpose;
1913        oss << ')';
1914        break;
1915      case G_CONDEVAL:
1916        oss << opToString(m_op)<< '(' ;
1917        m_mask->intoString(oss);
1918        oss << " ? ";
1919        m_left->intoString(oss);
1920        oss << " : ";
1921        m_right->intoString(oss);
1922        oss << ')';
1923        break;
1924    default:    default:
1925      oss << "UNKNOWN";      oss << "UNKNOWN";
1926    }    }
1927  }  }
1928    
1929    
1930    void
1931    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1932    {
1933      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1934      switch (getOpgroup(m_op))
1935      {
1936      case G_IDENTITY:
1937        if (m_id->isExpanded())
1938        {
1939           oss << "E";
1940        }
1941        else if (m_id->isTagged())
1942        {
1943          oss << "T";
1944        }
1945        else if (m_id->isConstant())
1946        {
1947          oss << "C";
1948        }
1949        else
1950        {
1951          oss << "?";
1952        }
1953        oss << '@' << m_id.get() << endl;
1954        break;
1955      case G_BINARY:
1956        oss << opToString(m_op) << endl;
1957        indent+='.';
1958        m_left->intoTreeString(oss, indent);
1959        m_right->intoTreeString(oss, indent);
1960        break;
1961      case G_UNARY:
1962      case G_UNARY_P:
1963      case G_NP1OUT:
1964      case G_NP1OUT_P:
1965      case G_REDUCTION:
1966        oss << opToString(m_op) << endl;
1967        indent+='.';
1968        m_left->intoTreeString(oss, indent);
1969        break;
1970      case G_TENSORPROD:
1971        oss << opToString(m_op) << endl;
1972        indent+='.';
1973        m_left->intoTreeString(oss, indent);
1974        m_right->intoTreeString(oss, indent);
1975        break;
1976      case G_NP1OUT_2P:
1977        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1978        indent+='.';
1979        m_left->intoTreeString(oss, indent);
1980        break;
1981      default:
1982        oss << "UNKNOWN";
1983      }
1984    }
1985    
1986    
1987  DataAbstract*  DataAbstract*
1988  DataLazy::deepCopy()  DataLazy::deepCopy()
1989  {  {
1990    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1991    {    {
1992    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1993    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1994      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1995      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1996    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1997      case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1998      case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1999      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2000      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001    default:    default:
2002      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2003    }    }
2004  }  }
2005    
2006    
2007    
2008    // There is no single, natural interpretation of getLength on DataLazy.
2009    // Instances of DataReady can look at the size of their vectors.
2010    // For lazy though, it could be the size the data would be if it were resolved;
2011    // or it could be some function of the lengths of the DataReady instances which
2012    // form part of the expression.
2013    // Rather than have people making assumptions, I have disabled the method.
2014  DataTypes::ValueType::size_type  DataTypes::ValueType::size_type
2015  DataLazy::getLength() const  DataLazy::getLength() const
2016  {  {
2017    return m_length;    throw DataException("getLength() does not make sense for lazy data.");
2018  }  }
2019    
2020    
# Line 936  DataLazy::getPointOffset(int sampleNo, Line 2080  DataLazy::getPointOffset(int sampleNo,
2080    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2081  }  }
2082    
2083  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2084  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2085  void  void
2086  DataLazy::setToZero()  DataLazy::setToZero()
2087  {  {
2088    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
2089    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2090    m_op=IDENTITY;  //   m_op=IDENTITY;
2091    m_right.reset();    //   m_right.reset();  
2092    m_left.reset();  //   m_left.reset();
2093    m_readytype='C';  //   m_readytype='C';
2094    m_buffsRequired=1;  //   m_buffsRequired=1;
2095    
2096      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2097      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2098    }
2099    
2100    bool
2101    DataLazy::actsExpanded() const
2102    {
2103        return (m_readytype=='E');
2104  }  }
2105    
2106  }   // end namespace  }   // end namespace

Legend:
Removed from v.1950  
changed lines
  Added in v.4154

  ViewVC Help
Powered by ViewVC 1.1.26