/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2082 by caltinay, Fri Nov 21 01:46:05 2008 UTC revision 3981 by jfenwick, Fri Sep 21 02:47:54 2012 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2012 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development since 2012 by School of Earth Sciences
13    *
14    *****************************************************************************/
15    
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
19  #ifdef _OPENMP  #ifdef _OPENMP
20  #include <omp.h>  #include <omp.h>
21  #endif  #endif
# Line 28  Line 25 
25  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
26  #include "Utils.h"  #include "Utils.h"
27    
28    #include "EscriptParams.h"
29    
30    #ifdef USE_NETCDF
31    #include <netcdfcpp.h>
32    #endif
33    
34    #include <iomanip>      // for some fancy formatting in debug
35    
36    // #define LAZYDEBUG(X) if (privdebug){X;}
37    #define LAZYDEBUG(X)
38    namespace
39    {
40    bool privdebug=false;
41    
42    #define ENABLEDEBUG privdebug=true;
43    #define DISABLEDEBUG privdebug=false;
44    }
45    
46    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47    
48    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49    
50    
51    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53  /*  /*
54  How does DataLazy work?  How does DataLazy work?
55  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 70  The convention that I use, is that the r Line 92  The convention that I use, is that the r
92  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
93  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
94    
95  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
96  1) Add to the ES_optype.  1) Add to the ES_optype.
97  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
98  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 90  namespace escript Line 112  namespace escript
112  namespace  namespace
113  {  {
114    
115    
116    // enabling this will print out when ever the maximum stacksize used by resolve increases
117    // it assumes _OPENMP is also in use
118    //#define LAZY_STACK_PROF
119    
120    
121    
122    #ifndef _OPENMP
123      #ifdef LAZY_STACK_PROF
124      #undef LAZY_STACK_PROF
125      #endif
126    #endif
127    
128    
129    #ifdef LAZY_STACK_PROF
130    std::vector<void*> stackstart(getNumberOfThreads());
131    std::vector<void*> stackend(getNumberOfThreads());
132    size_t maxstackuse=0;
133    #endif
134    
135  enum ES_opgroup  enum ES_opgroup
136  {  {
137     G_UNKNOWN,     G_UNKNOWN,
138     G_IDENTITY,     G_IDENTITY,
139     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
140     G_UNARY,     // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
141       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
142     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
143     G_TENSORPROD     // general tensor product     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
144       G_TENSORPROD,    // general tensor product
145       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
146       G_REDUCTION,     // non-pointwise unary op with a scalar output
147       G_CONDEVAL
148  };  };
149    
150    
# Line 108  string ES_opstrings[]={"UNKNOWN","IDENTI Line 155  string ES_opstrings[]={"UNKNOWN","IDENTI
155              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
156              "asinh","acosh","atanh",              "asinh","acosh","atanh",
157              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
158              "1/","where>0","where<0","where>=0","where<=0",              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
159              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
160              "prod"};              "prod",
161  int ES_opcount=36;              "transpose", "trace",
162                "swapaxes",
163                "minval", "maxval",
164                "condEval"};
165    int ES_opcount=44;
166  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
168              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
169              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
170              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
172              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
173              G_TENSORPROD};              G_TENSORPROD,
174                G_NP1OUT_P, G_NP1OUT_P,
175                G_NP1OUT_2P,
176                G_REDUCTION, G_REDUCTION,
177                G_CONDEVAL};
178  inline  inline
179  ES_opgroup  ES_opgroup
180  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 164  resultShape(DataAbstract_ptr left, DataA Line 219  resultShape(DataAbstract_ptr left, DataA
219        {        {
220          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
221        }        }
222    
223        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
224        {        {
225          return right->getShape();          return right->getShape();
# Line 177  resultShape(DataAbstract_ptr left, DataA Line 233  resultShape(DataAbstract_ptr left, DataA
233      return left->getShape();      return left->getShape();
234  }  }
235    
236    // return the shape for "op left"
237    
238    DataTypes::ShapeType
239    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
240    {
241        switch(op)
242        {
243            case TRANS:
244           {            // for the scoping of variables
245            const DataTypes::ShapeType& s=left->getShape();
246            DataTypes::ShapeType sh;
247            int rank=left->getRank();
248            if (axis_offset<0 || axis_offset>rank)
249            {
250                stringstream e;
251                e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
252                throw DataException(e.str());
253            }
254            for (int i=0; i<rank; i++)
255            {
256               int index = (axis_offset+i)%rank;
257               sh.push_back(s[index]); // Append to new shape
258            }
259            return sh;
260           }
261        break;
262        case TRACE:
263           {
264            int rank=left->getRank();
265            if (rank<2)
266            {
267               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
268            }
269            if ((axis_offset>rank-2) || (axis_offset<0))
270            {
271               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
272            }
273            if (rank==2)
274            {
275               return DataTypes::scalarShape;
276            }
277            else if (rank==3)
278            {
279               DataTypes::ShapeType sh;
280                   if (axis_offset==0)
281               {
282                    sh.push_back(left->getShape()[2]);
283                   }
284                   else     // offset==1
285               {
286                sh.push_back(left->getShape()[0]);
287                   }
288               return sh;
289            }
290            else if (rank==4)
291            {
292               DataTypes::ShapeType sh;
293               const DataTypes::ShapeType& s=left->getShape();
294                   if (axis_offset==0)
295               {
296                    sh.push_back(s[2]);
297                    sh.push_back(s[3]);
298                   }
299                   else if (axis_offset==1)
300               {
301                    sh.push_back(s[0]);
302                    sh.push_back(s[3]);
303                   }
304               else     // offset==2
305               {
306                sh.push_back(s[0]);
307                sh.push_back(s[1]);
308               }
309               return sh;
310            }
311            else        // unknown rank
312            {
313               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
314            }
315           }
316        break;
317            default:
318        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
319        }
320    }
321    
322    DataTypes::ShapeType
323    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
324    {
325         // This code taken from the Data.cpp swapaxes() method
326         // Some of the checks are probably redundant here
327         int axis0_tmp,axis1_tmp;
328         const DataTypes::ShapeType& s=left->getShape();
329         DataTypes::ShapeType out_shape;
330         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
331         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
332         int rank=left->getRank();
333         if (rank<2) {
334            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
335         }
336         if (axis0<0 || axis0>rank-1) {
337            stringstream e;
338            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
339            throw DataException(e.str());
340         }
341         if (axis1<0 || axis1>rank-1) {
342            stringstream e;
343            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
344            throw DataException(e.str());
345         }
346         if (axis0 == axis1) {
347             throw DataException("Error - Data::swapaxes: axis indices must be different.");
348         }
349         if (axis0 > axis1) {
350             axis0_tmp=axis1;
351             axis1_tmp=axis0;
352         } else {
353             axis0_tmp=axis0;
354             axis1_tmp=axis1;
355         }
356         for (int i=0; i<rank; i++) {
357           if (i == axis0_tmp) {
358              out_shape.push_back(s[axis1_tmp]);
359           } else if (i == axis1_tmp) {
360              out_shape.push_back(s[axis0_tmp]);
361           } else {
362              out_shape.push_back(s[i]);
363           }
364         }
365        return out_shape;
366    }
367    
368    
369  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
370  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
371  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
# Line 197  GTPShape(DataAbstract_ptr left, DataAbst Line 386  GTPShape(DataAbstract_ptr left, DataAbst
386    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)  { start1 = rank1-axis_offset; }
387    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
388    
389      if (rank0<axis_offset)
390      {
391        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
392      }
393    
394    // Adjust the shapes for transpose    // Adjust the shapes for transpose
395    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
# Line 226  GTPShape(DataAbstract_ptr left, DataAbst Line 419  GTPShape(DataAbstract_ptr left, DataAbst
419       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
420       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
421    }    }
   return shape2;  
 }  
422    
423      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
424      {
425         ostringstream os;
426         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
427         throw DataException(os.str());
428      }
429    
430  // determine the number of points in the result of "left op right"    return shape2;
 // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
   
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
431  }  }
432    
   
433  }   // end anonymous namespace  }   // end anonymous namespace
434    
435    
# Line 279  opToString(ES_optype op) Line 445  opToString(ES_optype op)
445    return ES_opstrings[op];    return ES_opstrings[op];
446  }  }
447    
448    void DataLazy::LazyNodeSetup()
449    {
450    #ifdef _OPENMP
451        int numthreads=omp_get_max_threads();
452        m_samples.resize(numthreads*m_samplesize);
453        m_sampleids=new int[numthreads];
454        for (int i=0;i<numthreads;++i)
455        {
456            m_sampleids[i]=-1;  
457        }
458    #else
459        m_samples.resize(m_samplesize);
460        m_sampleids=new int[1];
461        m_sampleids[0]=-1;
462    #endif  // _OPENMP
463    }
464    
465    
466    // Creates an identity node
467  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
468      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
469      m_op(IDENTITY),      ,m_sampleids(0),
470      m_axis_offset(0),      m_samples(1)
     m_transpose(0),  
     m_SL(0), m_SM(0), m_SR(0)  
471  {  {
472     if (p->isLazy())     if (p->isLazy())
473     {     {
# Line 296  DataLazy::DataLazy(DataAbstract_ptr p) Line 478  DataLazy::DataLazy(DataAbstract_ptr p)
478     }     }
479     else     else
480     {     {
481      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
482      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
483      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
484      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
485     }     }
486     m_buffsRequired=1;  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
487  }  }
488    
   
   
   
489  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
490      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
491      m_op(op),      m_op(op),
492      m_axis_offset(0),      m_axis_offset(0),
493      m_transpose(0),      m_transpose(0),
494      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
495  {  {
496     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
497     {     {
498      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
499     }     }
# Line 334  DataLazy::DataLazy(DataAbstract_ptr left Line 509  DataLazy::DataLazy(DataAbstract_ptr left
509     }     }
510     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
511     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
512     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
513     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_children=m_left->m_children+1;
514       m_height=m_left->m_height+1;
515       LazyNodeSetup();
516       SIZELIMIT
517  }  }
518    
519    
# Line 346  DataLazy::DataLazy(DataAbstract_ptr left Line 523  DataLazy::DataLazy(DataAbstract_ptr left
523      m_op(op),      m_op(op),
524      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
525  {  {
526    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
527     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
528     {     {
529      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
# Line 362  DataLazy::DataLazy(DataAbstract_ptr left Line 540  DataLazy::DataLazy(DataAbstract_ptr left
540     {     {
541      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
542      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
543    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
544     }     }
545     left->operandCheck(*right);     left->operandCheck(*right);
546    
547     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
548     {     {
549      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
550    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
551     }     }
552     else     else
553     {     {
554      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
555    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
556     }     }
557     if (right->isLazy())     if (right->isLazy())
558     {     {
559      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
560    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
561     }     }
562     else     else
563     {     {
564      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
565    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
566     }     }
567     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
568     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 396  DataLazy::DataLazy(DataAbstract_ptr left Line 579  DataLazy::DataLazy(DataAbstract_ptr left
579      m_readytype='C';      m_readytype='C';
580     }     }
581     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
582     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
583     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
584  cout << "(3)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
585       SIZELIMIT
586    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
587  }  }
588    
589  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
# Line 427  DataLazy::DataLazy(DataAbstract_ptr left Line 612  DataLazy::DataLazy(DataAbstract_ptr left
612      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
613      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
614     }     }
615     left->operandCheck(*right);  //    left->operandCheck(*right);
616    
617     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
618     {     {
# Line 460  DataLazy::DataLazy(DataAbstract_ptr left Line 645  DataLazy::DataLazy(DataAbstract_ptr left
645      m_readytype='C';      m_readytype='C';
646     }     }
647     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
648     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_children=m_left->m_children+m_right->m_children+2;
649     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_height=max(m_left->m_height,m_right->m_height)+1;
650  cout << "(4)Lazy created with " << m_samplesize << endl;     LazyNodeSetup();
651       SIZELIMIT
652    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
653  }  }
654    
655    
656  DataLazy::~DataLazy()  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
657        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
658        m_op(op),
659        m_axis_offset(axis_offset),
660        m_transpose(0),
661        m_tol(0)
662    {
663       if ((getOpgroup(op)!=G_NP1OUT_P))
664       {
665        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
666       }
667       DataLazy_ptr lleft;
668       if (!left->isLazy())
669       {
670        lleft=DataLazy_ptr(new DataLazy(left));
671       }
672       else
673       {
674        lleft=dynamic_pointer_cast<DataLazy>(left);
675       }
676       m_readytype=lleft->m_readytype;
677       m_left=lleft;
678       m_samplesize=getNumDPPSample()*getNoValues();
679       m_children=m_left->m_children+1;
680       m_height=m_left->m_height+1;
681       LazyNodeSetup();
682       SIZELIMIT
683    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
684    }
685    
686    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
687        : parent(left->getFunctionSpace(), left->getShape()),
688        m_op(op),
689        m_axis_offset(0),
690        m_transpose(0),
691        m_tol(tol)
692  {  {
693       if ((getOpgroup(op)!=G_UNARY_P))
694       {
695        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
696       }
697       DataLazy_ptr lleft;
698       if (!left->isLazy())
699       {
700        lleft=DataLazy_ptr(new DataLazy(left));
701       }
702       else
703       {
704        lleft=dynamic_pointer_cast<DataLazy>(left);
705       }
706       m_readytype=lleft->m_readytype;
707       m_left=lleft;
708       m_samplesize=getNumDPPSample()*getNoValues();
709       m_children=m_left->m_children+1;
710       m_height=m_left->m_height+1;
711       LazyNodeSetup();
712       SIZELIMIT
713    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
714  }  }
715    
716    
717  int  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
718  DataLazy::getBuffsRequired() const      : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
719        m_op(op),
720        m_axis_offset(axis0),
721        m_transpose(axis1),
722        m_tol(0)
723  {  {
724      return m_buffsRequired;     if ((getOpgroup(op)!=G_NP1OUT_2P))
725       {
726        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
727       }
728       DataLazy_ptr lleft;
729       if (!left->isLazy())
730       {
731        lleft=DataLazy_ptr(new DataLazy(left));
732       }
733       else
734       {
735        lleft=dynamic_pointer_cast<DataLazy>(left);
736       }
737       m_readytype=lleft->m_readytype;
738       m_left=lleft;
739       m_samplesize=getNumDPPSample()*getNoValues();
740       m_children=m_left->m_children+1;
741       m_height=m_left->m_height+1;
742       LazyNodeSetup();
743       SIZELIMIT
744    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
745  }  }
746    
747    
748  size_t  namespace
 DataLazy::getMaxSampleSize() const  
749  {  {
750      return m_maxsamplesize;  
751        inline int max3(int a, int b, int c)
752        {
753        int t=(a>b?a:b);
754        return (t>c?t:c);
755    
756        }
757    }
758    
759    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
760        : parent(left->getFunctionSpace(), left->getShape()),
761        m_op(CONDEVAL),
762        m_axis_offset(0),
763        m_transpose(0),
764        m_tol(0)
765    {
766    
767       DataLazy_ptr lmask;
768       DataLazy_ptr lleft;
769       DataLazy_ptr lright;
770       if (!mask->isLazy())
771       {
772        lmask=DataLazy_ptr(new DataLazy(mask));
773       }
774       else
775       {
776        lmask=dynamic_pointer_cast<DataLazy>(mask);
777       }
778       if (!left->isLazy())
779       {
780        lleft=DataLazy_ptr(new DataLazy(left));
781       }
782       else
783       {
784        lleft=dynamic_pointer_cast<DataLazy>(left);
785       }
786       if (!right->isLazy())
787       {
788        lright=DataLazy_ptr(new DataLazy(right));
789       }
790       else
791       {
792        lright=dynamic_pointer_cast<DataLazy>(right);
793       }
794       m_readytype=lmask->m_readytype;
795       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
796       {
797        throw DataException("Programmer Error - condEval arguments must have the same readytype");
798       }
799       m_left=lleft;
800       m_right=lright;
801       m_mask=lmask;
802       m_samplesize=getNumDPPSample()*getNoValues();
803       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
804       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
805       LazyNodeSetup();
806       SIZELIMIT
807    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
808    }
809    
810    
811    
812    DataLazy::~DataLazy()
813    {
814       delete[] m_sampleids;
815  }  }
816    
817    
818  /*  /*
819    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
820    This does the work for the collapse method.    This does the work for the collapse method.
# Line 602  DataLazy::collapseToReady() Line 933  DataLazy::collapseToReady()
933      case LEZ:      case LEZ:
934      result=left.whereNonPositive();      result=left.whereNonPositive();
935      break;      break;
936        case NEZ:
937        result=left.whereNonZero(m_tol);
938        break;
939        case EZ:
940        result=left.whereZero(m_tol);
941        break;
942      case SYM:      case SYM:
943      result=left.symmetric();      result=left.symmetric();
944      break;      break;
# Line 611  DataLazy::collapseToReady() Line 948  DataLazy::collapseToReady()
948      case PROD:      case PROD:
949      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
950      break;      break;
951        case TRANS:
952        result=left.transpose(m_axis_offset);
953        break;
954        case TRACE:
955        result=left.trace(m_axis_offset);
956        break;
957        case SWAP:
958        result=left.swapaxes(m_axis_offset, m_transpose);
959        break;
960        case MINVAL:
961        result=left.minval();
962        break;
963        case MAXVAL:
964        result=left.minval();
965        break;
966      default:      default:
967      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
968    }    }
# Line 638  DataLazy::collapse() Line 990  DataLazy::collapse()
990    m_op=IDENTITY;    m_op=IDENTITY;
991  }  }
992    
993  /*  
994    \brief Compute the value of the expression (unary operation) for the given sample.  
995    \return Vector which stores the value of the subexpression for the given sample.  
996    \param v A vector to store intermediate results.  
997    \param offset Index in v to begin storing results.  
998    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
999    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1000        {\
1001    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1002    If the result is stored in v it should be stored at the offset given.        { \
1003    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1004  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1005  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1006  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1007             lroffset+=leftstep; \
1008             rroffset+=rightstep; \
1009          }\
1010          lroffset+=oleftstep;\
1011          rroffset+=orightstep;\
1012        }
1013    
1014    
1015    // The result will be stored in m_samples
1016    // The return value is a pointer to the DataVector, offset is the offset within the return value
1017    const DataTypes::ValueType*
1018    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1019    {
1020    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1021        // collapse so we have a 'E' node or an IDENTITY for some other type
1022      if (m_readytype!='E' && m_op!=IDENTITY)
1023      {
1024        collapse();
1025      }
1026      if (m_op==IDENTITY)  
1027      {
1028        const ValueType& vec=m_id->getVectorRO();
1029        roffset=m_id->getPointOffset(sampleNo, 0);
1030    #ifdef LAZY_STACK_PROF
1031    int x;
1032    if (&x<stackend[omp_get_thread_num()])
1033    {
1034           stackend[omp_get_thread_num()]=&x;
1035    }
1036    #endif
1037        return &(vec);
1038      }
1039      if (m_readytype!='E')
1040      {
1041        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1042      }
1043      if (m_sampleids[tid]==sampleNo)
1044      {
1045        roffset=tid*m_samplesize;
1046        return &(m_samples);        // sample is already resolved
1047      }
1048      m_sampleids[tid]=sampleNo;
1049    
1050      switch (getOpgroup(m_op))
1051      {
1052      case G_UNARY:
1053      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1054      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1055      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1056      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1057      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1058      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1059      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1060      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1061      default:
1062        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1063      }
1064    }
1065    
1066    const DataTypes::ValueType*
1067    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1068  {  {
1069      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1070      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1071      // processing single points.      // processing single points.
1072        // we will also know we won't get identity nodes
1073    if (m_readytype!='E')    if (m_readytype!='E')
1074    {    {
1075      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1076    }    }
1077    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1078    const double* left=&((*vleft)[roffset]);    {
1079    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1080    roffset=offset;    }
1081      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1082      const double* left=&((*leftres)[roffset]);
1083      roffset=m_samplesize*tid;
1084      double* result=&(m_samples[roffset]);
1085    switch (m_op)    switch (m_op)
1086    {    {
1087      case SIN:        case SIN:  
# Line 762  DataLazy::resolveUnary(ValueType& v, siz Line 1180  DataLazy::resolveUnary(ValueType& v, siz
1180      case LEZ:      case LEZ:
1181      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1182      break;      break;
1183    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1184        case NEZ:
1185        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1186        break;
1187        case EZ:
1188        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1189        break;
1190    
1191      default:      default:
1192      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1193    }    }
1194    return &v;    return &(m_samples);
1195  }  }
1196    
1197    
1198  /*  const DataTypes::ValueType*
1199    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1200  {  {
1201      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1202      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1203      // processing single points.      // processing single points.
1204        // we will also know we won't get identity nodes
1205    if (m_readytype!='E')    if (m_readytype!='E')
1206    {    {
1207      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1208    }    }
1209      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1210    size_t subroffset=roffset+m_samplesize;    {
1211    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1212    roffset=offset;    }
1213      size_t loffset=0;
1214      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1215    
1216      roffset=m_samplesize*tid;
1217      unsigned int ndpps=getNumDPPSample();
1218      unsigned int psize=DataTypes::noValues(m_left->getShape());
1219      double* result=&(m_samples[roffset]);
1220      switch (m_op)
1221      {
1222        case MINVAL:
1223        {
1224          for (unsigned int z=0;z<ndpps;++z)
1225          {
1226            FMin op;
1227            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1228            loffset+=psize;
1229            result++;
1230          }
1231        }
1232        break;
1233        case MAXVAL:
1234        {
1235          for (unsigned int z=0;z<ndpps;++z)
1236          {
1237          FMax op;
1238          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1239          loffset+=psize;
1240          result++;
1241          }
1242        }
1243        break;
1244        default:
1245        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1246      }
1247      return &(m_samples);
1248    }
1249    
1250    const DataTypes::ValueType*
1251    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1252    {
1253        // we assume that any collapsing has been done before we get here
1254        // since we only have one argument we don't need to think about only
1255        // processing single points.
1256      if (m_readytype!='E')
1257      {
1258        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1259      }
1260      if (m_op==IDENTITY)
1261      {
1262        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1263      }
1264      size_t subroffset;
1265      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1266      roffset=m_samplesize*tid;
1267      size_t loop=0;
1268      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1269      size_t step=getNoValues();
1270      size_t offset=roffset;
1271    switch (m_op)    switch (m_op)
1272    {    {
1273      case SYM:      case SYM:
1274      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1275        {
1276            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1277            subroffset+=step;
1278            offset+=step;
1279        }
1280      break;      break;
1281      case NSYM:      case NSYM:
1282      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1283        {
1284            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1285            subroffset+=step;
1286            offset+=step;
1287        }
1288      break;      break;
1289      default:      default:
1290      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1291    }    }
1292    return &v;    return &m_samples;
1293  }  }
1294    
1295    const DataTypes::ValueType*
1296    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1297    {
1298        // we assume that any collapsing has been done before we get here
1299        // since we only have one argument we don't need to think about only
1300        // processing single points.
1301      if (m_readytype!='E')
1302      {
1303        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1304      }
1305      if (m_op==IDENTITY)
1306      {
1307        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1308      }
1309      size_t subroffset;
1310      size_t offset;
1311      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1312      roffset=m_samplesize*tid;
1313      offset=roffset;
1314      size_t loop=0;
1315      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1316      size_t outstep=getNoValues();
1317      size_t instep=m_left->getNoValues();
1318      switch (m_op)
1319      {
1320        case TRACE:
1321        for (loop=0;loop<numsteps;++loop)
1322        {
1323                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1324            subroffset+=instep;
1325            offset+=outstep;
1326        }
1327        break;
1328        case TRANS:
1329        for (loop=0;loop<numsteps;++loop)
1330        {
1331                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1332            subroffset+=instep;
1333            offset+=outstep;
1334        }
1335        break;
1336        default:
1337        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1338      }
1339      return &m_samples;
1340    }
1341    
1342    
1343    const DataTypes::ValueType*
1344  #define PROC_OP(TYPE,X)                               \  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1345      for (int i=0;i<steps;++i,resultp+=resultStep) \  {
1346      { \    if (m_readytype!='E')
1347         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \    {
1348         lroffset+=leftStep; \      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1349         rroffset+=rightStep; \    }
1350      if (m_op==IDENTITY)
1351      {
1352        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1353      }
1354      size_t subroffset;
1355      size_t offset;
1356      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1357      roffset=m_samplesize*tid;
1358      offset=roffset;
1359      size_t loop=0;
1360      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1361      size_t outstep=getNoValues();
1362      size_t instep=m_left->getNoValues();
1363      switch (m_op)
1364      {
1365        case SWAP:
1366        for (loop=0;loop<numsteps;++loop)
1367        {
1368                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1369            subroffset+=instep;
1370            offset+=outstep;
1371      }      }
1372        break;
1373        default:
1374        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1375      }
1376      return &m_samples;
1377    }
1378    
1379    const DataTypes::ValueType*
1380    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1381    {
1382      if (m_readytype!='E')
1383      {
1384        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1385      }
1386      if (m_op!=CONDEVAL)
1387      {
1388        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1389      }
1390      size_t subroffset;
1391    
1392      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1393      const ValueType* srcres=0;
1394      if ((*maskres)[subroffset]>0)
1395      {
1396        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1397      }
1398      else
1399      {
1400        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1401      }
1402    
1403      // Now we need to copy the result
1404    
1405      roffset=m_samplesize*tid;
1406      for (int i=0;i<m_samplesize;++i)
1407      {
1408        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1409      }
1410    
1411      return &m_samples;
1412    }
1413    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1414  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1415  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1416  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 842  DataLazy::resolveNP1OUT(ValueType& v, si Line 1420  DataLazy::resolveNP1OUT(ValueType& v, si
1420  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1421  // For example, 2+Vector.  // For example, 2+Vector.
1422  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1423  DataTypes::ValueType*  const DataTypes::ValueType*
1424  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1425  {  {
1426  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1427    
1428    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1429      // first work out which of the children are expanded      // first work out which of the children are expanded
1430    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1431    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1432    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1433    int steps=(bigloops?1:getNumDPPSample());    {
1434    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1435    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1436    {    bool leftScalar=(m_left->getRank()==0);
1437      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1438      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1439      chunksize=1;    // for scalar    {
1440    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1441    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1442    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1443    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1444      size_t chunksize=1;           // how many doubles will be processed in one go
1445      int leftstep=0;       // how far should the left offset advance after each step
1446      int rightstep=0;
1447      int numsteps=0;       // total number of steps for the inner loop
1448      int oleftstep=0;  // the o variables refer to the outer loop
1449      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1450      int onumsteps=1;
1451      
1452      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1453      bool RES=(rightExp && rightScalar);
1454      bool LS=(!leftExp && leftScalar); // left is a single scalar
1455      bool RS=(!rightExp && rightScalar);
1456      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1457      bool RN=(!rightExp && !rightScalar);
1458      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1459      bool REN=(rightExp && !rightScalar);
1460    
1461      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1462      {
1463        chunksize=m_left->getNumDPPSample()*leftsize;
1464        leftstep=0;
1465        rightstep=0;
1466        numsteps=1;
1467      }
1468      else if (LES || RES)
1469      {
1470        chunksize=1;
1471        if (LES)        // left is an expanded scalar
1472        {
1473            if (RS)
1474            {
1475               leftstep=1;
1476               rightstep=0;
1477               numsteps=m_left->getNumDPPSample();
1478            }
1479            else        // RN or REN
1480            {
1481               leftstep=0;
1482               oleftstep=1;
1483               rightstep=1;
1484               orightstep=(RN ? -(int)rightsize : 0);
1485               numsteps=rightsize;
1486               onumsteps=m_left->getNumDPPSample();
1487            }
1488        }
1489        else        // right is an expanded scalar
1490        {
1491            if (LS)
1492            {
1493               rightstep=1;
1494               leftstep=0;
1495               numsteps=m_right->getNumDPPSample();
1496            }
1497            else
1498            {
1499               rightstep=0;
1500               orightstep=1;
1501               leftstep=1;
1502               oleftstep=(LN ? -(int)leftsize : 0);
1503               numsteps=leftsize;
1504               onumsteps=m_right->getNumDPPSample();
1505            }
1506        }
1507      }
1508      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1509      {
1510        if (LEN)    // and Right will be a single value
1511        {
1512            chunksize=rightsize;
1513            leftstep=rightsize;
1514            rightstep=0;
1515            numsteps=m_left->getNumDPPSample();
1516            if (RS)
1517            {
1518               numsteps*=leftsize;
1519            }
1520        }
1521        else    // REN
1522        {
1523            chunksize=leftsize;
1524            rightstep=leftsize;
1525            leftstep=0;
1526            numsteps=m_right->getNumDPPSample();
1527            if (LS)
1528            {
1529               numsteps*=rightsize;
1530            }
1531        }
1532      }
1533    
1534      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1535      // Get the values of sub-expressions      // Get the values of sub-expressions
1536    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1537    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
1538      // the right child starts further along.  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1539    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1540    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1541    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1542    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1543    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1544    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1545    
1546    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1547    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1548    
1549    
1550      roffset=m_samplesize*tid;
1551      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
1552    switch(m_op)    switch(m_op)
1553    {    {
1554      case ADD:      case ADD:
# Line 888  cout << "Resolve binary: " << toString() Line 1569  cout << "Resolve binary: " << toString()
1569      default:      default:
1570      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1571    }    }
1572    roffset=offset;    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1573    return &v;    return &m_samples;
1574  }  }
1575    
1576    
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1577  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1578  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1579  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1580  DataTypes::ValueType*  const DataTypes::ValueType*
1581  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1582  {  {
1583  cout << "Resolve TensorProduct: " << toString() << endl;  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1584    
1585    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1586      // first work out which of the children are expanded      // first work out which of the children are expanded
1587    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1588    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1589    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1590    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1591    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1592    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0  
1593      // Get the values of sub-expressions (leave a gap of one sample for the result).    int resultStep=getNoValues();
1594    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    roffset=m_samplesize*tid;
1595    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    size_t offset=roffset;
1596    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1597      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1598    
1599      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
1600    
1601    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1602    cout << getNoValues() << endl;)
1603    
1604    
1605    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1606    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1607    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1608    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1609    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1610    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1611    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1612    
1613      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
1614    switch(m_op)    switch(m_op)
1615    {    {
1616      case PROD:      case PROD:
# Line 932  cout << "Resolve TensorProduct: " << toS Line 1618  cout << "Resolve TensorProduct: " << toS
1618      {      {
1619            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1620            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1621    
1622    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1623    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1624    
1625            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1626    
1627        lroffset+=leftStep;        lroffset+=leftStep;
1628        rroffset+=rightStep;        rroffset+=rightStep;
1629      }      }
# Line 941  cout << "Resolve TensorProduct: " << toS Line 1632  cout << "Resolve TensorProduct: " << toS
1632      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1633    }    }
1634    roffset=offset;    roffset=offset;
1635    return &v;    return &m_samples;
1636  }  }
1637    
1638    
1639    const DataTypes::ValueType*
1640    DataLazy::resolveSample(int sampleNo, size_t& roffset)
1641    {
1642    #ifdef _OPENMP
1643        int tid=omp_get_thread_num();
1644    #else
1645        int tid=0;
1646    #endif
1647    
1648  /*  #ifdef LAZY_STACK_PROF
1649    \brief Compute the value of the expression for the given sample.      stackstart[tid]=&tid;
1650    \return Vector which stores the value of the subexpression for the given sample.      stackend[tid]=&tid;
1651    \param v A vector to store intermediate results.      const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1652    \param offset Index in v to begin storing results.      size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1653    \param sampleNo Sample number to evaluate.      #pragma omp critical
1654    \param roffset (output parameter) the offset in the return vector where the result begins.      if (d>maxstackuse)
1655        {
1656    cout << "Max resolve Stack use " << d << endl;
1657            maxstackuse=d;
1658        }
1659        return r;
1660    #else
1661        return resolveNodeSample(tid, sampleNo, roffset);
1662    #endif
1663    }
1664    
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1665    
1666  // the roffset is the offset within the returned vector where the data begins  // This needs to do the work of the identity constructor
1667  const DataTypes::ValueType*  void
1668  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveToIdentity()
1669  {  {
1670  cout << "Resolve sample " << toString() << endl;     if (m_op==IDENTITY)
1671      // collapse so we have a 'E' node or an IDENTITY for some other type      return;
1672    if (m_readytype!='E' && m_op!=IDENTITY)     DataReady_ptr p=resolveNodeWorker();
1673       makeIdentity(p);
1674    }
1675    
1676    void DataLazy::makeIdentity(const DataReady_ptr& p)
1677    {
1678       m_op=IDENTITY;
1679       m_axis_offset=0;
1680       m_transpose=0;
1681       m_SL=m_SM=m_SR=0;
1682       m_children=m_height=0;
1683       m_id=p;
1684       if(p->isConstant()) {m_readytype='C';}
1685       else if(p->isExpanded()) {m_readytype='E';}
1686       else if (p->isTagged()) {m_readytype='T';}
1687       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
1688       m_samplesize=p->getNumDPPSample()*p->getNoValues();
1689       m_left.reset();
1690       m_right.reset();
1691    }
1692    
1693    
1694    DataReady_ptr
1695    DataLazy::resolve()
1696    {
1697        resolveToIdentity();
1698        return m_id;
1699    }
1700    
1701    
1702    /* This is really a static method but I think that caused problems in windows */
1703    void
1704    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1705    {
1706      if (dats.empty())
1707    {    {
1708      collapse();      return;
1709    }    }
1710    if (m_op==IDENTITY)      vector<DataLazy*> work;
1711      FunctionSpace fs=dats[0]->getFunctionSpace();
1712      bool match=true;
1713      for (int i=dats.size()-1;i>=0;--i)
1714    {    {
1715      const ValueType& vec=m_id->getVector();      if (dats[i]->m_readytype!='E')
1716      if (m_readytype=='C')      {
1717      {          dats[i]->collapse();
1718      roffset=0;      }
1719      return &(vec);      if (dats[i]->m_op!=IDENTITY)
1720      }      {
1721      roffset=m_id->getPointOffset(sampleNo, 0);          work.push_back(dats[i]);
1722      return &(vec);          if (fs!=dats[i]->getFunctionSpace())
1723            {
1724                match=false;
1725            }
1726        }
1727    }    }
1728    if (m_readytype!='E')    if (work.empty())
1729    {    {
1730      throw DataException("Programmer Error - Collapse did not produce an expanded node.");      return;     // no work to do
1731    }    }
1732    switch (getOpgroup(m_op))    if (match)    // all functionspaces match.  Yes I realise this is overly strict
1733      {     // it is possible that dats[0] is one of the objects which we discarded and
1734            // all the other functionspaces match.
1735        vector<DataExpanded*> dep;
1736        vector<ValueType*> vecs;
1737        for (int i=0;i<work.size();++i)
1738        {
1739            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1740            vecs.push_back(&(dep[i]->getVectorRW()));
1741        }
1742        int totalsamples=work[0]->getNumSamples();
1743        const ValueType* res=0; // Storage for answer
1744        int sample;
1745        #pragma omp parallel private(sample, res)
1746        {
1747            size_t roffset=0;
1748            #pragma omp for schedule(static)
1749            for (sample=0;sample<totalsamples;++sample)
1750            {
1751            roffset=0;
1752            int j;
1753            for (j=work.size()-1;j>=0;--j)
1754            {
1755    #ifdef _OPENMP
1756                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1757    #else
1758                    res=work[j]->resolveNodeSample(0,sample,roffset);
1759    #endif
1760                    DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1761                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1762            }
1763            }
1764        }
1765        // Now we need to load the new results as identity ops into the lazy nodes
1766        for (int i=work.size()-1;i>=0;--i)
1767        {
1768            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1769        }
1770      }
1771      else  // functionspaces do not match
1772    {    {
1773    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);      for (int i=0;i<work.size();++i)
1774    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);      {
1775    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);          work[i]->resolveToIdentity();
1776    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);      }
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
1777    }    }
1778  }  }
1779    
1780    
1781  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1782  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1783  DataReady_ptr  DataReady_ptr
1784  DataLazy::resolve()  DataLazy::resolveNodeWorker()
1785  {  {
   
 cout << "Sample size=" << m_samplesize << endl;  
 cout << "Buffers=" << m_buffsRequired << endl;  
   
1786    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1787    {    {
1788      collapse();      collapse();
# Line 1016  cout << "Buffers=" << m_buffsRequired << Line 1792  cout << "Buffers=" << m_buffsRequired <<
1792      return m_id;      return m_id;
1793    }    }
1794      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=getNumberOfThreads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 cout << "Buffer created with size=" << v.size() << endl;  
1795    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1796    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
1797    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1798    
1799    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1800    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1801    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
1802    size_t resoffset=0;       // where in the vector to find the answer  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1803    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
1804    for (sample=0;sample<totalsamples;++sample)    {
1805    {      size_t roffset=0;
1806  cout << "################################# " << sample << endl;  #ifdef LAZY_STACK_PROF
1807        stackstart[omp_get_thread_num()]=&roffset;
1808        stackend[omp_get_thread_num()]=&roffset;
1809    #endif
1810        #pragma omp for schedule(static)
1811        for (sample=0;sample<totalsamples;++sample)
1812        {
1813            roffset=0;
1814  #ifdef _OPENMP  #ifdef _OPENMP
1815      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1816  #else  #else
1817      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1818  #endif  #endif
1819  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1820      outoffset=result->getPointOffset(sample,0);  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1821  cerr << "offset=" << outoffset << endl;              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1822      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1823      {      }
1824      resvec[outoffset]=(*res)[resoffset];    }
1825      }  #ifdef LAZY_STACK_PROF
1826  cerr << "*********************************" << endl;    for (int i=0;i<getNumberOfThreads();++i)
1827      {
1828        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1829    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1830        if (r>maxstackuse)
1831        {
1832            maxstackuse=r;
1833        }
1834    }    }
1835      cout << "Max resolve Stack use=" << maxstackuse << endl;
1836    #endif
1837    return resptr;    return resptr;
1838  }  }
1839    
# Line 1057  std::string Line 1841  std::string
1841  DataLazy::toString() const  DataLazy::toString() const
1842  {  {
1843    ostringstream oss;    ostringstream oss;
1844    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1845    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1846      {
1847      case 1:   // tree format
1848        oss << endl;
1849        intoTreeString(oss,"");
1850        break;
1851      case 2:   // just the depth
1852        break;
1853      default:
1854        intoString(oss);
1855        break;
1856      }
1857    return oss.str();    return oss.str();
1858  }  }
1859    
# Line 1066  DataLazy::toString() const Line 1861  DataLazy::toString() const
1861  void  void
1862  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
1863  {  {
1864    //    oss << "[" << m_children <<";"<<m_height <<"]";
1865    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1866    {    {
1867    case G_IDENTITY:    case G_IDENTITY:
# Line 1095  DataLazy::intoString(ostringstream& oss) Line 1891  DataLazy::intoString(ostringstream& oss)
1891      oss << ')';      oss << ')';
1892      break;      break;
1893    case G_UNARY:    case G_UNARY:
1894      case G_UNARY_P:
1895    case G_NP1OUT:    case G_NP1OUT:
1896      case G_NP1OUT_P:
1897      case G_REDUCTION:
1898      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1899      m_left->intoString(oss);      m_left->intoString(oss);
1900      oss << ')';      oss << ')';
# Line 1107  DataLazy::intoString(ostringstream& oss) Line 1906  DataLazy::intoString(ostringstream& oss)
1906      m_right->intoString(oss);      m_right->intoString(oss);
1907      oss << ')';      oss << ')';
1908      break;      break;
1909      case G_NP1OUT_2P:
1910        oss << opToString(m_op) << '(';
1911        m_left->intoString(oss);
1912        oss << ", " << m_axis_offset << ", " << m_transpose;
1913        oss << ')';
1914        break;
1915      case G_CONDEVAL:
1916        oss << opToString(m_op)<< '(' ;
1917        m_mask->intoString(oss);
1918        oss << " ? ";
1919        m_left->intoString(oss);
1920        oss << " : ";
1921        m_right->intoString(oss);
1922        oss << ')';
1923        break;
1924      default:
1925        oss << "UNKNOWN";
1926      }
1927    }
1928    
1929    
1930    void
1931    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1932    {
1933      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1934      switch (getOpgroup(m_op))
1935      {
1936      case G_IDENTITY:
1937        if (m_id->isExpanded())
1938        {
1939           oss << "E";
1940        }
1941        else if (m_id->isTagged())
1942        {
1943          oss << "T";
1944        }
1945        else if (m_id->isConstant())
1946        {
1947          oss << "C";
1948        }
1949        else
1950        {
1951          oss << "?";
1952        }
1953        oss << '@' << m_id.get() << endl;
1954        break;
1955      case G_BINARY:
1956        oss << opToString(m_op) << endl;
1957        indent+='.';
1958        m_left->intoTreeString(oss, indent);
1959        m_right->intoTreeString(oss, indent);
1960        break;
1961      case G_UNARY:
1962      case G_UNARY_P:
1963      case G_NP1OUT:
1964      case G_NP1OUT_P:
1965      case G_REDUCTION:
1966        oss << opToString(m_op) << endl;
1967        indent+='.';
1968        m_left->intoTreeString(oss, indent);
1969        break;
1970      case G_TENSORPROD:
1971        oss << opToString(m_op) << endl;
1972        indent+='.';
1973        m_left->intoTreeString(oss, indent);
1974        m_right->intoTreeString(oss, indent);
1975        break;
1976      case G_NP1OUT_2P:
1977        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1978        indent+='.';
1979        m_left->intoTreeString(oss, indent);
1980        break;
1981    default:    default:
1982      oss << "UNKNOWN";      oss << "UNKNOWN";
1983    }    }
1984  }  }
1985    
1986    
1987  DataAbstract*  DataAbstract*
1988  DataLazy::deepCopy()  DataLazy::deepCopy()
1989  {  {
1990    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1991    {    {
1992    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1993    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1994      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1995      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1996    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1997    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1998    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1999      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2000      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001    default:    default:
2002      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2003    }    }
2004  }  }
2005    
2006    
2007    
2008  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2009  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2010  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
# Line 1203  DataLazy::getPointOffset(int sampleNo, Line 2080  DataLazy::getPointOffset(int sampleNo,
2080    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2081  }  }
2082    
2083  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2084  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2085  void  void
2086  DataLazy::setToZero()  DataLazy::setToZero()
2087  {  {
2088    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
2089    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2090    m_op=IDENTITY;  //   m_op=IDENTITY;
2091    m_right.reset();    //   m_right.reset();  
2092    m_left.reset();  //   m_left.reset();
2093    m_readytype='C';  //   m_readytype='C';
2094    m_buffsRequired=1;  //   m_buffsRequired=1;
2095    
2096      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2097      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2098    }
2099    
2100    bool
2101    DataLazy::actsExpanded() const
2102    {
2103        return (m_readytype=='E');
2104  }  }
2105    
2106  }   // end namespace  }   // end namespace

Legend:
Removed from v.2082  
changed lines
  Added in v.3981

  ViewVC Help
Powered by ViewVC 1.1.26