/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2066 by jfenwick, Thu Nov 20 05:31:33 2008 UTC revision 2635 by jfenwick, Thu Aug 27 04:54:41 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 28  Line 28 
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29  #include "Utils.h"  #include "Utils.h"
30    
31    #include "EscriptParams.h"
32    
33    #include <iomanip>      // for some fancy formatting in debug
34    
35    // #define LAZYDEBUG(X) if (privdebug){X;}
36    #define LAZYDEBUG(X)
37    namespace
38    {
39    bool privdebug=false;
40    
41    #define ENABLEDEBUG privdebug=true;
42    #define DISABLEDEBUG privdebug=false;
43    }
44    
45    // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
46    
47    #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49    
50  /*  /*
51  How does DataLazy work?  How does DataLazy work?
52  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 70  The convention that I use, is that the r Line 89  The convention that I use, is that the r
89  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.  For expressions which evaluate to Constant or Tagged, there is a different evaluation method.
90  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.  The collapse method invokes the (non-lazy) operations on the Data class to evaluate the expression.
91    
92  To add a new operator you need to do the following (plus anything I might have forgotten):  To add a new operator you need to do the following (plus anything I might have forgotten - adding a new group for example):
93  1) Add to the ES_optype.  1) Add to the ES_optype.
94  2) determine what opgroup your operation belongs to (X)  2) determine what opgroup your operation belongs to (X)
95  3) add a string for the op to the end of ES_opstrings  3) add a string for the op to the end of ES_opstrings
# Line 96  enum ES_opgroup Line 115  enum ES_opgroup
115     G_IDENTITY,     G_IDENTITY,
116     G_BINARY,        // pointwise operations with two arguments     G_BINARY,        // pointwise operations with two arguments
117     G_UNARY,     // pointwise operations with one argument     G_UNARY,     // pointwise operations with one argument
118       G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
119     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
120     G_TENSORPROD     // general tensor product     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
121       G_TENSORPROD,    // general tensor product
122       G_NP1OUT_2P      // non-pointwise op with one output requiring two params
123  };  };
124    
125    
# Line 108  string ES_opstrings[]={"UNKNOWN","IDENTI Line 130  string ES_opstrings[]={"UNKNOWN","IDENTI
130              "asin","acos","atan","sinh","cosh","tanh","erf",              "asin","acos","atan","sinh","cosh","tanh","erf",
131              "asinh","acosh","atanh",              "asinh","acosh","atanh",
132              "log10","log","sign","abs","neg","pos","exp","sqrt",              "log10","log","sign","abs","neg","pos","exp","sqrt",
133              "1/","where>0","where<0","where>=0","where<=0",              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
134              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
135              "prod"};              "prod",
136  int ES_opcount=36;              "transpose", "trace",
137                "swapaxes"};
138    int ES_opcount=41;
139  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
140              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
141              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
142              G_UNARY,G_UNARY,G_UNARY,                    // 20              G_UNARY,G_UNARY,G_UNARY,                    // 20
143              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,        // 28              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 28
144              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,            // 33              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
145              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
146              G_TENSORPROD};              G_TENSORPROD,
147                G_NP1OUT_P, G_NP1OUT_P,
148                G_NP1OUT_2P};
149  inline  inline
150  ES_opgroup  ES_opgroup
151  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 177  resultShape(DataAbstract_ptr left, DataA Line 203  resultShape(DataAbstract_ptr left, DataA
203      return left->getShape();      return left->getShape();
204  }  }
205    
206    // return the shape for "op left"
207    
208    DataTypes::ShapeType
209    resultShape(DataAbstract_ptr left, ES_optype op, int axis_offset)
210    {
211        switch(op)
212        {
213            case TRANS:
214           {            // for the scoping of variables
215            const DataTypes::ShapeType& s=left->getShape();
216            DataTypes::ShapeType sh;
217            int rank=left->getRank();
218            if (axis_offset<0 || axis_offset>rank)
219            {
220                   throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);
221                }
222                for (int i=0; i<rank; i++)
223            {
224               int index = (axis_offset+i)%rank;
225                   sh.push_back(s[index]); // Append to new shape
226                }
227            return sh;
228           }
229        break;
230        case TRACE:
231           {
232            int rank=left->getRank();
233            if (rank<2)
234            {
235               throw DataException("Trace can only be computed for objects with rank 2 or greater.");
236            }
237            if ((axis_offset>rank-2) || (axis_offset<0))
238            {
239               throw DataException("Trace: axis offset must lie between 0 and rank-2 inclusive.");
240            }
241            if (rank==2)
242            {
243               return DataTypes::scalarShape;
244            }
245            else if (rank==3)
246            {
247               DataTypes::ShapeType sh;
248                   if (axis_offset==0)
249               {
250                    sh.push_back(left->getShape()[2]);
251                   }
252                   else     // offset==1
253               {
254                sh.push_back(left->getShape()[0]);
255                   }
256               return sh;
257            }
258            else if (rank==4)
259            {
260               DataTypes::ShapeType sh;
261               const DataTypes::ShapeType& s=left->getShape();
262                   if (axis_offset==0)
263               {
264                    sh.push_back(s[2]);
265                    sh.push_back(s[3]);
266                   }
267                   else if (axis_offset==1)
268               {
269                    sh.push_back(s[0]);
270                    sh.push_back(s[3]);
271                   }
272               else     // offset==2
273               {
274                sh.push_back(s[0]);
275                sh.push_back(s[1]);
276               }
277               return sh;
278            }
279            else        // unknown rank
280            {
281               throw DataException("Error - Data::trace can only be calculated for rank 2, 3 or 4 object.");
282            }
283           }
284        break;
285            default:
286        throw DataException("Programmer error - resultShape(left,op) can't compute shapes for operator "+opToString(op)+".");
287        }
288    }
289    
290    DataTypes::ShapeType
291    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
292    {
293         // This code taken from the Data.cpp swapaxes() method
294         // Some of the checks are probably redundant here
295         int axis0_tmp,axis1_tmp;
296         const DataTypes::ShapeType& s=left->getShape();
297         DataTypes::ShapeType out_shape;
298         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
299         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
300         int rank=left->getRank();
301         if (rank<2) {
302            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
303         }
304         if (axis0<0 || axis0>rank-1) {
305            throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
306         }
307         if (axis1<0 || axis1>rank-1) {
308             throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
309         }
310         if (axis0 == axis1) {
311             throw DataException("Error - Data::swapaxes: axis indices must be different.");
312         }
313         if (axis0 > axis1) {
314             axis0_tmp=axis1;
315             axis1_tmp=axis0;
316         } else {
317             axis0_tmp=axis0;
318             axis1_tmp=axis1;
319         }
320         for (int i=0; i<rank; i++) {
321           if (i == axis0_tmp) {
322              out_shape.push_back(s[axis1_tmp]);
323           } else if (i == axis1_tmp) {
324              out_shape.push_back(s[axis0_tmp]);
325           } else {
326              out_shape.push_back(s[i]);
327           }
328         }
329        return out_shape;
330    }
331    
332    
333  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
334  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
335  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
# Line 197  GTPShape(DataAbstract_ptr left, DataAbst Line 350  GTPShape(DataAbstract_ptr left, DataAbst
350    else if (transpose == 2)  { start1 = rank1-axis_offset; }    else if (transpose == 2)  { start1 = rank1-axis_offset; }
351    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }    else              { throw DataException("DataLazy GeneralTensorProduct Constructor: Error - transpose should be 0, 1 or 2"); }
352    
353      if (rank0<axis_offset)
354      {
355        throw DataException("DataLazy GeneralTensorProduct Constructor: Error - rank of left < axisoffset");
356      }
357    
358    // Adjust the shapes for transpose    // Adjust the shapes for transpose
359    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather    DataTypes::ShapeType tmpShape0(rank0);    // pre-sizing the vectors rather
# Line 226  GTPShape(DataAbstract_ptr left, DataAbst Line 383  GTPShape(DataAbstract_ptr left, DataAbst
383       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z       for (int i=0; i<rank0-axis_offset; i++, ++out_index) { shape2[out_index]=tmpShape0[i]; } // First part of arg_0_Z
384       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z       for (int i=axis_offset; i<rank1; i++, ++out_index)   { shape2[out_index]=tmpShape1[i]; } // Last part of arg_1_Z
385    }    }
   return shape2;  
 }  
386    
387      if (shape2.size()>ESCRIPT_MAX_DATA_RANK)
388      {
389         ostringstream os;
390         os << "C_GeneralTensorProduct: Error - Attempt to create a rank " << shape2.size() << " object. The maximum rank is " << ESCRIPT_MAX_DATA_RANK << ".";
391         throw DataException(os.str());
392      }
393    
394  // determine the number of points in the result of "left op right"    return shape2;
395  // note that determining the resultLength for G_TENSORPROD is more complex and will not be processed here  }
 // size_t  
 // resultLength(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op)  
 // {  
 //    switch (getOpgroup(op))  
 //    {  
 //    case G_BINARY: return left->getLength();  
 //    case G_UNARY: return left->getLength();  
 //    case G_NP1OUT: return left->getLength();  
 //    default:  
 //  throw DataException("Programmer Error - attempt to getLength() for operator "+opToString(op)+".");  
 //    }  
 // }  
396    
397  // determine the number of samples requires to evaluate an expression combining left and right  // determine the number of samples requires to evaluate an expression combining left and right
398  // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
399  // The same goes for G_TENSORPROD  // The same goes for G_TENSORPROD
400    // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.
401    // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
402    // multiple times
403  int  int
404  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
405  {  {
406     switch(getOpgroup(op))     switch(getOpgroup(op))
407     {     {
408     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
409     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
410     case G_UNARY: return max(left->getBuffsRequired(),1);     case G_UNARY:
411       case G_UNARY_P: return max(left->getBuffsRequired(),1);
412     case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);     case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
413       case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
414     case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
415       case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
416     default:     default:
417      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
418     }     }
# Line 279  opToString(ES_optype op) Line 434  opToString(ES_optype op)
434    return ES_opstrings[op];    return ES_opstrings[op];
435  }  }
436    
437    #ifdef LAZY_NODE_STORAGE
438    void DataLazy::LazyNodeSetup()
439    {
440    #ifdef _OPENMP
441        int numthreads=omp_get_max_threads();
442        m_samples.resize(numthreads*m_samplesize);
443        m_sampleids=new int[numthreads];
444        for (int i=0;i<numthreads;++i)
445        {
446            m_sampleids[i]=-1;  
447        }
448    #else
449        m_samples.resize(m_samplesize);
450        m_sampleids=new int[1];
451        m_sampleids[0]=-1;
452    #endif  // _OPENMP
453    }
454    #endif   // LAZY_NODE_STORAGE
455    
456    
457    // Creates an identity node
458  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
459      : parent(p->getFunctionSpace(),p->getShape()),      : parent(p->getFunctionSpace(),p->getShape())
460      m_op(IDENTITY),  #ifdef LAZY_NODE_STORAGE
461      m_axis_offset(0),      ,m_sampleids(0),
462      m_transpose(0),      m_samples(1)
463      m_SL(0), m_SM(0), m_SR(0)  #endif
464  {  {
465     if (p->isLazy())     if (p->isLazy())
466     {     {
# Line 296  DataLazy::DataLazy(DataAbstract_ptr p) Line 471  DataLazy::DataLazy(DataAbstract_ptr p)
471     }     }
472     else     else
473     {     {
474      m_id=dynamic_pointer_cast<DataReady>(p);      p->makeLazyShared();
475      if(p->isConstant()) {m_readytype='C';}      DataReady_ptr dr=dynamic_pointer_cast<DataReady>(p);
476      else if(p->isExpanded()) {m_readytype='E';}      makeIdentity(dr);
477      else if (p->isTagged()) {m_readytype='T';}  LAZYDEBUG(cout << "Wrapping " << dr.get() << " id=" << m_id.get() << endl;)
     else {throw DataException("Unknown DataReady instance in DataLazy constructor.");}  
478     }     }
479     m_buffsRequired=1;  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
    m_samplesize=getNumDPPSample()*getNoValues();  
    m_maxsamplesize=m_samplesize;  
 cout << "(1)Lazy created with " << m_samplesize << endl;  
480  }  }
481    
   
   
   
482  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
483      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),left->getShape()),
484      m_op(op),      m_op(op),
# Line 337  DataLazy::DataLazy(DataAbstract_ptr left Line 505  DataLazy::DataLazy(DataAbstract_ptr left
505     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point     m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
506     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
507     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
508       m_children=m_left->m_children+1;
509       m_height=m_left->m_height+1;
510    #ifdef LAZY_NODE_STORAGE
511       LazyNodeSetup();
512    #endif
513       SIZELIMIT
514  }  }
515    
516    
# Line 346  DataLazy::DataLazy(DataAbstract_ptr left Line 520  DataLazy::DataLazy(DataAbstract_ptr left
520      m_op(op),      m_op(op),
521      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
522  {  {
523    LAZYDEBUG(cout << "Forming operator with " << left.get() << " " << right.get() << endl;)
524     if ((getOpgroup(op)!=G_BINARY))     if ((getOpgroup(op)!=G_BINARY))
525     {     {
526      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, right, op) will only process BINARY operations.");
# Line 362  DataLazy::DataLazy(DataAbstract_ptr left Line 537  DataLazy::DataLazy(DataAbstract_ptr left
537     {     {
538      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
539      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
540    LAZYDEBUG(cout << "Right interpolation required " << right.get() << endl;)
541     }     }
542     left->operandCheck(*right);     left->operandCheck(*right);
543    
544     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
545     {     {
546      m_left=dynamic_pointer_cast<DataLazy>(left);      m_left=dynamic_pointer_cast<DataLazy>(left);
547    LAZYDEBUG(cout << "Left is " << m_left->toString() << endl;)
548     }     }
549     else     else
550     {     {
551      m_left=DataLazy_ptr(new DataLazy(left));      m_left=DataLazy_ptr(new DataLazy(left));
552    LAZYDEBUG(cout << "Left " << left.get() << " wrapped " << m_left->m_id.get() << endl;)
553     }     }
554     if (right->isLazy())     if (right->isLazy())
555     {     {
556      m_right=dynamic_pointer_cast<DataLazy>(right);      m_right=dynamic_pointer_cast<DataLazy>(right);
557    LAZYDEBUG(cout << "Right is " << m_right->toString() << endl;)
558     }     }
559     else     else
560     {     {
561      m_right=DataLazy_ptr(new DataLazy(right));      m_right=DataLazy_ptr(new DataLazy(right));
562    LAZYDEBUG(cout << "Right " << right.get() << " wrapped " << m_right->m_id.get() << endl;)
563     }     }
564     char lt=m_left->m_readytype;     char lt=m_left->m_readytype;
565     char rt=m_right->m_readytype;     char rt=m_right->m_readytype;
# Line 398  DataLazy::DataLazy(DataAbstract_ptr left Line 578  DataLazy::DataLazy(DataAbstract_ptr left
578     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
579     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
580     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
581  cout << "(3)Lazy created with " << m_samplesize << endl;     m_children=m_left->m_children+m_right->m_children+2;
582       m_height=max(m_left->m_height,m_right->m_height)+1;
583    #ifdef LAZY_NODE_STORAGE
584       LazyNodeSetup();
585    #endif
586       SIZELIMIT
587    LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588  }  }
589    
590  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)  DataLazy::DataLazy(DataAbstract_ptr left, DataAbstract_ptr right, ES_optype op, int axis_offset, int transpose)
# Line 427  DataLazy::DataLazy(DataAbstract_ptr left Line 613  DataLazy::DataLazy(DataAbstract_ptr left
613      Data tmp(Data(right),getFunctionSpace());      Data tmp(Data(right),getFunctionSpace());
614      right=tmp.borrowDataPtr();      right=tmp.borrowDataPtr();
615     }     }
616     left->operandCheck(*right);  //    left->operandCheck(*right);
617    
618     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required     if (left->isLazy())          // the children need to be DataLazy. Wrap them in IDENTITY if required
619     {     {
# Line 462  DataLazy::DataLazy(DataAbstract_ptr left Line 648  DataLazy::DataLazy(DataAbstract_ptr left
648     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
649     m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());       m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());  
650     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
651  cout << "(4)Lazy created with " << m_samplesize << endl;     m_children=m_left->m_children+m_right->m_children+2;
652       m_height=max(m_left->m_height,m_right->m_height)+1;
653    #ifdef LAZY_NODE_STORAGE
654       LazyNodeSetup();
655    #endif
656       SIZELIMIT
657    LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
658    }
659    
660    
661    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, int axis_offset)
662        : parent(left->getFunctionSpace(), resultShape(left,op, axis_offset)),
663        m_op(op),
664        m_axis_offset(axis_offset),
665        m_transpose(0),
666        m_tol(0)
667    {
668       if ((getOpgroup(op)!=G_NP1OUT_P))
669       {
670        throw DataException("Programmer error - constructor DataLazy(left, op, ax) will only process UNARY operations which require parameters.");
671       }
672       DataLazy_ptr lleft;
673       if (!left->isLazy())
674       {
675        lleft=DataLazy_ptr(new DataLazy(left));
676       }
677       else
678       {
679        lleft=dynamic_pointer_cast<DataLazy>(left);
680       }
681       m_readytype=lleft->m_readytype;
682       m_left=lleft;
683       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
684       m_samplesize=getNumDPPSample()*getNoValues();
685       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
686       m_children=m_left->m_children+1;
687       m_height=m_left->m_height+1;
688    #ifdef LAZY_NODE_STORAGE
689       LazyNodeSetup();
690    #endif
691       SIZELIMIT
692    LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
693    }
694    
695    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, double tol)
696        : parent(left->getFunctionSpace(), left->getShape()),
697        m_op(op),
698        m_axis_offset(0),
699        m_transpose(0),
700        m_tol(tol)
701    {
702       if ((getOpgroup(op)!=G_UNARY_P))
703       {
704        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require parameters.");
705       }
706       DataLazy_ptr lleft;
707       if (!left->isLazy())
708       {
709        lleft=DataLazy_ptr(new DataLazy(left));
710       }
711       else
712       {
713        lleft=dynamic_pointer_cast<DataLazy>(left);
714       }
715       m_readytype=lleft->m_readytype;
716       m_left=lleft;
717       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
718       m_samplesize=getNumDPPSample()*getNoValues();
719       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
720       m_children=m_left->m_children+1;
721       m_height=m_left->m_height+1;
722    #ifdef LAZY_NODE_STORAGE
723       LazyNodeSetup();
724    #endif
725       SIZELIMIT
726    LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
727  }  }
728    
729    
730    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
731        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
732        m_op(op),
733        m_axis_offset(axis0),
734        m_transpose(axis1),
735        m_tol(0)
736    {
737       if ((getOpgroup(op)!=G_NP1OUT_2P))
738       {
739        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
740       }
741       DataLazy_ptr lleft;
742       if (!left->isLazy())
743       {
744        lleft=DataLazy_ptr(new DataLazy(left));
745       }
746       else
747       {
748        lleft=dynamic_pointer_cast<DataLazy>(left);
749       }
750       m_readytype=lleft->m_readytype;
751       m_left=lleft;
752       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
753       m_samplesize=getNumDPPSample()*getNoValues();
754       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
755       m_children=m_left->m_children+1;
756       m_height=m_left->m_height+1;
757    #ifdef LAZY_NODE_STORAGE
758       LazyNodeSetup();
759    #endif
760       SIZELIMIT
761    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
762    }
763    
764  DataLazy::~DataLazy()  DataLazy::~DataLazy()
765  {  {
766    #ifdef LAZY_NODE_SETUP
767       delete[] m_sampleids;
768       delete[] m_samples;
769    #endif
770  }  }
771    
772    
# Line 484  DataLazy::getMaxSampleSize() const Line 783  DataLazy::getMaxSampleSize() const
783      return m_maxsamplesize;      return m_maxsamplesize;
784  }  }
785    
786    
787    
788    size_t
789    DataLazy::getSampleBufferSize() const
790    {
791        return m_maxsamplesize*(max(1,m_buffsRequired));
792    }
793    
794  /*  /*
795    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
796    This does the work for the collapse method.    This does the work for the collapse method.
# Line 602  DataLazy::collapseToReady() Line 909  DataLazy::collapseToReady()
909      case LEZ:      case LEZ:
910      result=left.whereNonPositive();      result=left.whereNonPositive();
911      break;      break;
912        case NEZ:
913        result=left.whereNonZero(m_tol);
914        break;
915        case EZ:
916        result=left.whereZero(m_tol);
917        break;
918      case SYM:      case SYM:
919      result=left.symmetric();      result=left.symmetric();
920      break;      break;
# Line 611  DataLazy::collapseToReady() Line 924  DataLazy::collapseToReady()
924      case PROD:      case PROD:
925      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);      result=C_GeneralTensorProduct(left,right,m_axis_offset, m_transpose);
926      break;      break;
927        case TRANS:
928        result=left.transpose(m_axis_offset);
929        break;
930        case TRACE:
931        result=left.trace(m_axis_offset);
932        break;
933        case SWAP:
934        result=left.swapaxes(m_axis_offset, m_transpose);
935        break;
936      default:      default:
937      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
938    }    }
# Line 660  DataLazy::resolveUnary(ValueType& v, siz Line 982  DataLazy::resolveUnary(ValueType& v, siz
982    {    {
983      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
984    }    }
985    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
986    const double* left=&((*vleft)[roffset]);    const double* left=&((*vleft)[roffset]);
987    double* result=&(v[offset]);    double* result=&(v[offset]);
988    roffset=offset;    roffset=offset;
# Line 762  DataLazy::resolveUnary(ValueType& v, siz Line 1084  DataLazy::resolveUnary(ValueType& v, siz
1084      case LEZ:      case LEZ:
1085      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));      tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1086      break;      break;
1087    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1088        case NEZ:
1089        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1090        break;
1091        case EZ:
1092        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1093        break;
1094    
1095      default:      default:
1096      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
# Line 770  DataLazy::resolveUnary(ValueType& v, siz Line 1099  DataLazy::resolveUnary(ValueType& v, siz
1099  }  }
1100    
1101    
1102    
1103    
1104    
1105    
1106  /*  /*
1107    \brief Compute the value of the expression (unary operation) for the given sample.    \brief Compute the value of the expression (unary operation) for the given sample.
1108    \return Vector which stores the value of the subexpression for the given sample.    \return Vector which stores the value of the subexpression for the given sample.
# Line 794  DataLazy::resolveNP1OUT(ValueType& v, si Line 1127  DataLazy::resolveNP1OUT(ValueType& v, si
1127    }    }
1128      // since we can't write the result over the input, we need a result offset further along      // since we can't write the result over the input, we need a result offset further along
1129    size_t subroffset=roffset+m_samplesize;    size_t subroffset=roffset+m_samplesize;
1130    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);  LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1131      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1132    roffset=offset;    roffset=offset;
1133      size_t loop=0;
1134      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1135      size_t step=getNoValues();
1136    switch (m_op)    switch (m_op)
1137    {    {
1138      case SYM:      case SYM:
1139      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1140        {
1141            DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1142            subroffset+=step;
1143            offset+=step;
1144        }
1145      break;      break;
1146      case NSYM:      case NSYM:
1147      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
1148        {
1149            DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
1150            subroffset+=step;
1151            offset+=step;
1152        }
1153      break;      break;
1154      default:      default:
1155      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
# Line 810  DataLazy::resolveNP1OUT(ValueType& v, si Line 1157  DataLazy::resolveNP1OUT(ValueType& v, si
1157    return &v;    return &v;
1158  }  }
1159    
1160    /*
1161      \brief Compute the value of the expression (unary operation) for the given sample.
1162      \return Vector which stores the value of the subexpression for the given sample.
1163      \param v A vector to store intermediate results.
1164      \param offset Index in v to begin storing results.
1165      \param sampleNo Sample number to evaluate.
1166      \param roffset (output parameter) the offset in the return vector where the result begins.
1167    
1168      The return value will be an existing vector so do not deallocate it.
1169      If the result is stored in v it should be stored at the offset given.
1170      Everything from offset to the end of v should be considered available for this method to use.
1171    */
1172    DataTypes::ValueType*
1173    DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1174    {
1175        // we assume that any collapsing has been done before we get here
1176        // since we only have one argument we don't need to think about only
1177        // processing single points.
1178      if (m_readytype!='E')
1179      {
1180        throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
1181      }
1182        // since we can't write the result over the input, we need a result offset further along
1183      size_t subroffset;
1184      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1185    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1186    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1187      roffset=offset;
1188      size_t loop=0;
1189      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1190      size_t outstep=getNoValues();
1191      size_t instep=m_left->getNoValues();
1192    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1193      switch (m_op)
1194      {
1195        case TRACE:
1196        for (loop=0;loop<numsteps;++loop)
1197        {
1198    size_t zz=sampleNo*getNumDPPSample()+loop;
1199    if (zz==5800)
1200    {
1201    LAZYDEBUG(cerr << "point=" <<  zz<< endl;)
1202    LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
1203    LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
1204    LAZYDEBUG(cerr << subroffset << endl;)
1205    LAZYDEBUG(cerr << "output=" << offset << endl;)
1206    }
1207                DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1208    if (zz==5800)
1209    {
1210    LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1211    }
1212            subroffset+=instep;
1213            offset+=outstep;
1214        }
1215        break;
1216        case TRANS:
1217        for (loop=0;loop<numsteps;++loop)
1218        {
1219                DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1220            subroffset+=instep;
1221            offset+=outstep;
1222        }
1223        break;
1224        default:
1225        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1226      }
1227      return &v;
1228    }
1229    
1230    
1231    /*
1232      \brief Compute the value of the expression (unary operation with int params) for the given sample.
1233      \return Vector which stores the value of the subexpression for the given sample.
1234      \param v A vector to store intermediate results.
1235      \param offset Index in v to begin storing results.
1236      \param sampleNo Sample number to evaluate.
1237      \param roffset (output parameter) the offset in the return vector where the result begins.
1238    
1239      The return value will be an existing vector so do not deallocate it.
1240      If the result is stored in v it should be stored at the offset given.
1241      Everything from offset to the end of v should be considered available for this method to use.
1242    */
1243    DataTypes::ValueType*
1244    DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1245    {
1246        // we assume that any collapsing has been done before we get here
1247        // since we only have one argument we don't need to think about only
1248        // processing single points.
1249      if (m_readytype!='E')
1250      {
1251        throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1252      }
1253        // since we can't write the result over the input, we need a result offset further along
1254      size_t subroffset;
1255      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1256    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1257    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1258      roffset=offset;
1259      size_t loop=0;
1260      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1261      size_t outstep=getNoValues();
1262      size_t instep=m_left->getNoValues();
1263    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1264      switch (m_op)
1265      {
1266        case SWAP:
1267        for (loop=0;loop<numsteps;++loop)
1268        {
1269                DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1270            subroffset+=instep;
1271            offset+=outstep;
1272        }
1273        break;
1274        default:
1275        throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1276      }
1277      return &v;
1278    }
1279    
1280    
1281    
1282  #define PROC_OP(TYPE,X)                               \  #define PROC_OP(TYPE,X)                               \
1283      for (int i=0;i<steps;++i,resultp+=resultStep) \      for (int j=0;j<onumsteps;++j)\
1284      { \      {\
1285         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1286         lroffset+=leftStep; \        { \
1287         rroffset+=rightStep; \  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1288    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1289             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1290    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1291             lroffset+=leftstep; \
1292             rroffset+=rightstep; \
1293          }\
1294          lroffset+=oleftstep;\
1295          rroffset+=orightstep;\
1296      }      }
1297    
1298  /*  /*
# Line 845  DataLazy::resolveNP1OUT(ValueType& v, si Line 1319  DataLazy::resolveNP1OUT(ValueType& v, si
1319  DataTypes::ValueType*  DataTypes::ValueType*
1320  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1321  {  {
1322  cout << "Resolve binary: " << toString() << endl;  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1323    
1324    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1325      // first work out which of the children are expanded      // first work out which of the children are expanded
1326    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1327    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1328    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if (!leftExp && !rightExp)
1329    int steps=(bigloops?1:getNumDPPSample());    {
1330    size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint      throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1331    if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops    }
1332    {    bool leftScalar=(m_left->getRank()==0);
1333      EsysAssert((m_left->getRank()==0) || (m_right->getRank()==0), "Error - Ranks must match unless one is 0.");    bool rightScalar=(m_right->getRank()==0);
1334      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1335      chunksize=1;    // for scalar    {
1336    }          throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1337    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);    }
1338    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    size_t leftsize=m_left->getNoValues();
1339    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    size_t rightsize=m_right->getNoValues();
1340      size_t chunksize=1;           // how many doubles will be processed in one go
1341      int leftstep=0;       // how far should the left offset advance after each step
1342      int rightstep=0;
1343      int numsteps=0;       // total number of steps for the inner loop
1344      int oleftstep=0;  // the o variables refer to the outer loop
1345      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1346      int onumsteps=1;
1347      
1348      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1349      bool RES=(rightExp && rightScalar);
1350      bool LS=(!leftExp && leftScalar); // left is a single scalar
1351      bool RS=(!rightExp && rightScalar);
1352      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1353      bool RN=(!rightExp && !rightScalar);
1354      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1355      bool REN=(rightExp && !rightScalar);
1356    
1357      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1358      {
1359        chunksize=m_left->getNumDPPSample()*leftsize;
1360        leftstep=0;
1361        rightstep=0;
1362        numsteps=1;
1363      }
1364      else if (LES || RES)
1365      {
1366        chunksize=1;
1367        if (LES)        // left is an expanded scalar
1368        {
1369            if (RS)
1370            {
1371               leftstep=1;
1372               rightstep=0;
1373               numsteps=m_left->getNumDPPSample();
1374            }
1375            else        // RN or REN
1376            {
1377               leftstep=0;
1378               oleftstep=1;
1379               rightstep=1;
1380               orightstep=(RN ? -(int)rightsize : 0);
1381               numsteps=rightsize;
1382               onumsteps=m_left->getNumDPPSample();
1383            }
1384        }
1385        else        // right is an expanded scalar
1386        {
1387            if (LS)
1388            {
1389               rightstep=1;
1390               leftstep=0;
1391               numsteps=m_right->getNumDPPSample();
1392            }
1393            else
1394            {
1395               rightstep=0;
1396               orightstep=1;
1397               leftstep=1;
1398               oleftstep=(LN ? -(int)leftsize : 0);
1399               numsteps=leftsize;
1400               onumsteps=m_right->getNumDPPSample();
1401            }
1402        }
1403      }
1404      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1405      {
1406        if (LEN)    // and Right will be a single value
1407        {
1408            chunksize=rightsize;
1409            leftstep=rightsize;
1410            rightstep=0;
1411            numsteps=m_left->getNumDPPSample();
1412            if (RS)
1413            {
1414               numsteps*=leftsize;
1415            }
1416        }
1417        else    // REN
1418        {
1419            chunksize=leftsize;
1420            rightstep=leftsize;
1421            leftstep=0;
1422            numsteps=m_right->getNumDPPSample();
1423            if (LS)
1424            {
1425               numsteps*=rightsize;
1426            }
1427        }
1428      }
1429    
1430      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1431      // Get the values of sub-expressions      // Get the values of sub-expressions
1432    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1433    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note      // calcBufss for why we can't put offset as the 2nd param above
1434      const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1435      // the right child starts further along.      // the right child starts further along.
1436    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1437    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1438    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1439    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1440    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1441    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1442    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1443    
1444    
1445    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1446    switch(m_op)    switch(m_op)
1447    {    {
# Line 888  cout << "Resolve binary: " << toString() Line 1463  cout << "Resolve binary: " << toString()
1463      default:      default:
1464      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1465    }    }
1466    roffset=offset;      roffset=offset;
1467    return &v;    return &v;
1468  }  }
1469    
1470    
1471    
1472  /*  /*
1473    \brief Compute the value of the expression (tensor product) for the given sample.    \brief Compute the value of the expression (tensor product) for the given sample.
1474    \return Vector which stores the value of the subexpression for the given sample.    \return Vector which stores the value of the subexpression for the given sample.
# Line 911  cout << "Resolve binary: " << toString() Line 1487  cout << "Resolve binary: " << toString()
1487  DataTypes::ValueType*  DataTypes::ValueType*
1488  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const
1489  {  {
1490  cout << "Resolve TensorProduct: " << toString() << endl;  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)
1491    
1492    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1493      // first work out which of the children are expanded      // first work out which of the children are expanded
1494    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1495    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1496    int steps=getNumDPPSample();    int steps=getNumDPPSample();
1497    int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);
1498    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/
1499    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1500      int rightStep=(rightExp?m_right->getNoValues() : 0);
1501    
1502      int resultStep=getNoValues();
1503      // Get the values of sub-expressions (leave a gap of one sample for the result).      // Get the values of sub-expressions (leave a gap of one sample for the result).
1504    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    int gap=offset+m_samplesize;  
1505    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);  
1506    LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1507    
1508      const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);
1509      gap+=m_left->getMaxSampleSize();
1510    
1511    
1512    LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1513    
1514    
1515      const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);
1516    
1517    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1518    cout << getNoValues() << endl;)
1519    LAZYDEBUG(cerr << "Result of left=";)
1520    LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;
1521    
1522    for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)
1523    {
1524    cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";
1525    if (i%4==0) cout << endl;
1526    })
1527    LAZYDEBUG(cerr << "\nResult of right=" << endl;)
1528    LAZYDEBUG(
1529    for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)
1530    {
1531    cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";
1532    if (i%4==0) cout << endl;
1533    }
1534    cerr << endl;
1535    )
1536    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1537    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1538    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1539    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1540    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1541    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1542    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1543    
1544    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1545    switch(m_op)    switch(m_op)
1546    {    {
1547      case PROD:      case PROD:
1548      for (int i=0;i<steps;++i,resultp+=resultStep)      for (int i=0;i<steps;++i,resultp+=resultStep)
1549      {      {
1550    
1551    LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1552    LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1553    LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1554    
1555            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1556            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1557    
1558    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1559    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1560    
1561            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1562    
1563    LAZYDEBUG(cout << "Results--\n";
1564    {
1565      DataVector dv(getNoValues());
1566    for (int z=0;z<getNoValues();++z)
1567    {
1568      cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";
1569      if (z%4==0) cout << endl;
1570      dv[z]=resultp[z];
1571    }
1572    cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");
1573    cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;
1574    }
1575    )
1576        lroffset+=leftStep;        lroffset+=leftStep;
1577        rroffset+=rightStep;        rroffset+=rightStep;
1578      }      }
# Line 945  cout << "Resolve TensorProduct: " << toS Line 1585  cout << "Resolve TensorProduct: " << toS
1585  }  }
1586    
1587    
1588    #ifdef LAZY_NODE_STORAGE
1589    
1590    // The result will be stored in m_samples
1591    // The return value is a pointer to the DataVector, offset is the offset within the return value
1592    const DataTypes::ValueType*
1593    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1594    {
1595    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1596        // collapse so we have a 'E' node or an IDENTITY for some other type
1597      if (m_readytype!='E' && m_op!=IDENTITY)
1598      {
1599        collapse();
1600      }
1601      if (m_op==IDENTITY)  
1602      {
1603        const ValueType& vec=m_id->getVectorRO();
1604    //     if (m_readytype=='C')
1605    //     {
1606    //  roffset=0;      // all samples read from the same position
1607    //  return &(m_samples);
1608    //     }
1609        roffset=m_id->getPointOffset(sampleNo, 0);
1610        return &(vec);
1611      }
1612      if (m_readytype!='E')
1613      {
1614        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1615      }
1616      if (m_sampleids[tid]==sampleNo)
1617      {
1618        roffset=tid*m_samplesize;
1619        return &(m_samples);        // sample is already resolved
1620      }
1621      m_sampleids[tid]=sampleNo;
1622      switch (getOpgroup(m_op))
1623      {
1624      case G_UNARY:
1625      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1626      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1627      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1628      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1629      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1630      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1631      default:
1632        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1633      }
1634    }
1635    
1636    const DataTypes::ValueType*
1637    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1638    {
1639        // we assume that any collapsing has been done before we get here
1640        // since we only have one argument we don't need to think about only
1641        // processing single points.
1642        // we will also know we won't get identity nodes
1643      if (m_readytype!='E')
1644      {
1645        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1646      }
1647      if (m_op==IDENTITY)
1648      {
1649        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1650      }
1651      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1652      const double* left=&((*leftres)[roffset]);
1653      roffset=m_samplesize*tid;
1654      double* result=&(m_samples[roffset]);
1655      switch (m_op)
1656      {
1657        case SIN:  
1658        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1659        break;
1660        case COS:
1661        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1662        break;
1663        case TAN:
1664        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1665        break;
1666        case ASIN:
1667        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1668        break;
1669        case ACOS:
1670        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1671        break;
1672        case ATAN:
1673        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1674        break;
1675        case SINH:
1676        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1677        break;
1678        case COSH:
1679        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1680        break;
1681        case TANH:
1682        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1683        break;
1684        case ERF:
1685    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1686        throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1687    #else
1688        tensor_unary_operation(m_samplesize, left, result, ::erf);
1689        break;
1690    #endif
1691       case ASINH:
1692    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1693        tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1694    #else
1695        tensor_unary_operation(m_samplesize, left, result, ::asinh);
1696    #endif  
1697        break;
1698       case ACOSH:
1699    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1700        tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1701    #else
1702        tensor_unary_operation(m_samplesize, left, result, ::acosh);
1703    #endif  
1704        break;
1705       case ATANH:
1706    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1707        tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1708    #else
1709        tensor_unary_operation(m_samplesize, left, result, ::atanh);
1710    #endif  
1711        break;
1712        case LOG10:
1713        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1714        break;
1715        case LOG:
1716        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1717        break;
1718        case SIGN:
1719        tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1720        break;
1721        case ABS:
1722        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1723        break;
1724        case NEG:
1725        tensor_unary_operation(m_samplesize, left, result, negate<double>());
1726        break;
1727        case POS:
1728        // it doesn't mean anything for delayed.
1729        // it will just trigger a deep copy of the lazy object
1730        throw DataException("Programmer error - POS not supported for lazy data.");
1731        break;
1732        case EXP:
1733        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1734        break;
1735        case SQRT:
1736        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1737        break;
1738        case RECIP:
1739        tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1740        break;
1741        case GZ:
1742        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1743        break;
1744        case LZ:
1745        tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1746        break;
1747        case GEZ:
1748        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1749        break;
1750        case LEZ:
1751        tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1752        break;
1753    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1754        case NEZ:
1755        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1756        break;
1757        case EZ:
1758        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1759        break;
1760    
1761        default:
1762        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1763      }
1764      return &(m_samples);
1765    }
1766    
1767    
1768    const DataTypes::ValueType*
1769    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1770    {
1771        // we assume that any collapsing has been done before we get here
1772        // since we only have one argument we don't need to think about only
1773        // processing single points.
1774      if (m_readytype!='E')
1775      {
1776        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1777      }
1778      if (m_op==IDENTITY)
1779      {
1780        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1781      }
1782      size_t subroffset;
1783      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1784      roffset=m_samplesize*tid;
1785      size_t loop=0;
1786      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1787      size_t step=getNoValues();
1788      size_t offset=roffset;
1789      switch (m_op)
1790      {
1791        case SYM:
1792        for (loop=0;loop<numsteps;++loop)
1793        {
1794            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1795            subroffset+=step;
1796            offset+=step;
1797        }
1798        break;
1799        case NSYM:
1800        for (loop=0;loop<numsteps;++loop)
1801        {
1802            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1803            subroffset+=step;
1804            offset+=step;
1805        }
1806        break;
1807        default:
1808        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1809      }
1810      return &m_samples;
1811    }
1812    
1813    const DataTypes::ValueType*
1814    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1815    {
1816        // we assume that any collapsing has been done before we get here
1817        // since we only have one argument we don't need to think about only
1818        // processing single points.
1819      if (m_readytype!='E')
1820      {
1821        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1822      }
1823      if (m_op==IDENTITY)
1824      {
1825        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1826      }
1827      size_t subroffset;
1828      size_t offset;
1829      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1830      roffset=m_samplesize*tid;
1831      offset=roffset;
1832      size_t loop=0;
1833      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1834      size_t outstep=getNoValues();
1835      size_t instep=m_left->getNoValues();
1836      switch (m_op)
1837      {
1838        case TRACE:
1839        for (loop=0;loop<numsteps;++loop)
1840        {
1841                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1842            subroffset+=instep;
1843            offset+=outstep;
1844        }
1845        break;
1846        case TRANS:
1847        for (loop=0;loop<numsteps;++loop)
1848        {
1849                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1850            subroffset+=instep;
1851            offset+=outstep;
1852        }
1853        break;
1854        default:
1855        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1856      }
1857      return &m_samples;
1858    }
1859    
1860    
1861    const DataTypes::ValueType*
1862    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1863    {
1864      if (m_readytype!='E')
1865      {
1866        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1867      }
1868      if (m_op==IDENTITY)
1869      {
1870        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1871      }
1872      size_t subroffset;
1873      size_t offset;
1874      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1875      roffset=m_samplesize*tid;
1876      offset=roffset;
1877      size_t loop=0;
1878      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1879      size_t outstep=getNoValues();
1880      size_t instep=m_left->getNoValues();
1881      switch (m_op)
1882      {
1883        case SWAP:
1884        for (loop=0;loop<numsteps;++loop)
1885        {
1886                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1887            subroffset+=instep;
1888            offset+=outstep;
1889        }
1890        break;
1891        default:
1892        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1893      }
1894      return &m_samples;
1895    }
1896    
1897    
1898    
1899    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1900    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1901    // If both children are expanded, then we can process them in a single operation (we treat
1902    // the whole sample as one big datapoint.
1903    // If one of the children is not expanded, then we need to treat each point in the sample
1904    // individually.
1905    // There is an additional complication when scalar operations are considered.
1906    // For example, 2+Vector.
1907    // In this case each double within the point is treated individually
1908    const DataTypes::ValueType*
1909    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1910    {
1911    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1912    
1913      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1914        // first work out which of the children are expanded
1915      bool leftExp=(m_left->m_readytype=='E');
1916      bool rightExp=(m_right->m_readytype=='E');
1917      if (!leftExp && !rightExp)
1918      {
1919        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1920      }
1921      bool leftScalar=(m_left->getRank()==0);
1922      bool rightScalar=(m_right->getRank()==0);
1923      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1924      {
1925        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1926      }
1927      size_t leftsize=m_left->getNoValues();
1928      size_t rightsize=m_right->getNoValues();
1929      size_t chunksize=1;           // how many doubles will be processed in one go
1930      int leftstep=0;       // how far should the left offset advance after each step
1931      int rightstep=0;
1932      int numsteps=0;       // total number of steps for the inner loop
1933      int oleftstep=0;  // the o variables refer to the outer loop
1934      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1935      int onumsteps=1;
1936      
1937      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1938      bool RES=(rightExp && rightScalar);
1939      bool LS=(!leftExp && leftScalar); // left is a single scalar
1940      bool RS=(!rightExp && rightScalar);
1941      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1942      bool RN=(!rightExp && !rightScalar);
1943      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1944      bool REN=(rightExp && !rightScalar);
1945    
1946      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1947      {
1948        chunksize=m_left->getNumDPPSample()*leftsize;
1949        leftstep=0;
1950        rightstep=0;
1951        numsteps=1;
1952      }
1953      else if (LES || RES)
1954      {
1955        chunksize=1;
1956        if (LES)        // left is an expanded scalar
1957        {
1958            if (RS)
1959            {
1960               leftstep=1;
1961               rightstep=0;
1962               numsteps=m_left->getNumDPPSample();
1963            }
1964            else        // RN or REN
1965            {
1966               leftstep=0;
1967               oleftstep=1;
1968               rightstep=1;
1969               orightstep=(RN ? -(int)rightsize : 0);
1970               numsteps=rightsize;
1971               onumsteps=m_left->getNumDPPSample();
1972            }
1973        }
1974        else        // right is an expanded scalar
1975        {
1976            if (LS)
1977            {
1978               rightstep=1;
1979               leftstep=0;
1980               numsteps=m_right->getNumDPPSample();
1981            }
1982            else
1983            {
1984               rightstep=0;
1985               orightstep=1;
1986               leftstep=1;
1987               oleftstep=(LN ? -(int)leftsize : 0);
1988               numsteps=leftsize;
1989               onumsteps=m_right->getNumDPPSample();
1990            }
1991        }
1992      }
1993      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1994      {
1995        if (LEN)    // and Right will be a single value
1996        {
1997            chunksize=rightsize;
1998            leftstep=rightsize;
1999            rightstep=0;
2000            numsteps=m_left->getNumDPPSample();
2001            if (RS)
2002            {
2003               numsteps*=leftsize;
2004            }
2005        }
2006        else    // REN
2007        {
2008            chunksize=leftsize;
2009            rightstep=leftsize;
2010            leftstep=0;
2011            numsteps=m_right->getNumDPPSample();
2012            if (LS)
2013            {
2014               numsteps*=rightsize;
2015            }
2016        }
2017      }
2018    
2019      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
2020        // Get the values of sub-expressions
2021      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
2022      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2023    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2024    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2025    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2026    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2027    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2028    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2029    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
2030    
2031    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2032    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2033    
2034    
2035      roffset=m_samplesize*tid;
2036      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
2037      switch(m_op)
2038      {
2039        case ADD:
2040            PROC_OP(NO_ARG,plus<double>());
2041        break;
2042        case SUB:
2043        PROC_OP(NO_ARG,minus<double>());
2044        break;
2045        case MUL:
2046        PROC_OP(NO_ARG,multiplies<double>());
2047        break;
2048        case DIV:
2049        PROC_OP(NO_ARG,divides<double>());
2050        break;
2051        case POW:
2052           PROC_OP(double (double,double),::pow);
2053        break;
2054        default:
2055        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2056      }
2057    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2058      return &m_samples;
2059    }
2060    
2061    
2062    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2063    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2064    // unlike the other resolve helpers, we must treat these datapoints separately.
2065    const DataTypes::ValueType*
2066    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2067    {
2068    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2069    
2070      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2071        // first work out which of the children are expanded
2072      bool leftExp=(m_left->m_readytype=='E');
2073      bool rightExp=(m_right->m_readytype=='E');
2074      int steps=getNumDPPSample();
2075      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
2076      int rightStep=(rightExp?m_right->getNoValues() : 0);
2077    
2078      int resultStep=getNoValues();
2079      roffset=m_samplesize*tid;
2080      size_t offset=roffset;
2081    
2082      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2083    
2084      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2085    
2086    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
2087    cout << getNoValues() << endl;)
2088    
2089    
2090    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2091    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2092    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2093    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2094    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2095    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2096    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2097    
2098      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
2099      switch(m_op)
2100      {
2101        case PROD:
2102        for (int i=0;i<steps;++i,resultp+=resultStep)
2103        {
2104              const double *ptr_0 = &((*left)[lroffset]);
2105              const double *ptr_1 = &((*right)[rroffset]);
2106    
2107    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2108    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2109    
2110              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2111    
2112          lroffset+=leftStep;
2113          rroffset+=rightStep;
2114        }
2115        break;
2116        default:
2117        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2118      }
2119      roffset=offset;
2120      return &m_samples;
2121    }
2122    #endif //LAZY_NODE_STORAGE
2123    
2124  /*  /*
2125    \brief Compute the value of the expression for the given sample.    \brief Compute the value of the expression for the given sample.
# Line 963  cout << "Resolve TensorProduct: " << toS Line 2138  cout << "Resolve TensorProduct: " << toS
2138    
2139  // the roffset is the offset within the returned vector where the data begins  // the roffset is the offset within the returned vector where the data begins
2140  const DataTypes::ValueType*  const DataTypes::ValueType*
2141  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
2142  {  {
2143  cout << "Resolve sample " << toString() << endl;  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
2144      // collapse so we have a 'E' node or an IDENTITY for some other type      // collapse so we have a 'E' node or an IDENTITY for some other type
2145    if (m_readytype!='E' && m_op!=IDENTITY)    if (m_readytype!='E' && m_op!=IDENTITY)
2146    {    {
# Line 973  cout << "Resolve sample " << toString() Line 2148  cout << "Resolve sample " << toString()
2148    }    }
2149    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
2150    {    {
2151      const ValueType& vec=m_id->getVector();      const ValueType& vec=m_id->getVectorRO();
2152      if (m_readytype=='C')      if (m_readytype=='C')
2153      {      {
2154      roffset=0;      roffset=0;
2155    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
2156      return &(vec);      return &(vec);
2157      }      }
2158      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
2159    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
2160      return &(vec);      return &(vec);
2161    }    }
2162    if (m_readytype!='E')    if (m_readytype!='E')
# Line 988  cout << "Resolve sample " << toString() Line 2165  cout << "Resolve sample " << toString()
2165    }    }
2166    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2167    {    {
2168    case G_UNARY: return resolveUnary(v, offset,sampleNo,roffset);    case G_UNARY:
2169      case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);
2170    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);    case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);
2171    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
2172      case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
2173    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
2174      case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
2175    default:    default:
2176      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
2177    }    }
2178    
2179    }
2180    
2181    const DataTypes::ValueType*
2182    DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)
2183    {
2184    #ifdef _OPENMP
2185        int tid=omp_get_thread_num();
2186    #else
2187        int tid=0;
2188    #endif
2189    #ifdef LAZY_NODE_STORAGE
2190        return resolveNodeSample(tid, sampleNo, roffset);
2191    #else
2192        return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
2193    #endif
2194    }
2195    
2196    
2197    // This needs to do the work of the identity constructor
2198    void
2199    DataLazy::resolveToIdentity()
2200    {
2201       if (m_op==IDENTITY)
2202        return;
2203    #ifndef LAZY_NODE_STORAGE
2204       DataReady_ptr p=resolveVectorWorker();
2205    #else
2206       DataReady_ptr p=resolveNodeWorker();
2207    #endif
2208       makeIdentity(p);
2209    }
2210    
2211    void DataLazy::makeIdentity(const DataReady_ptr& p)
2212    {
2213       m_op=IDENTITY;
2214       m_axis_offset=0;
2215       m_transpose=0;
2216       m_SL=m_SM=m_SR=0;
2217       m_children=m_height=0;
2218       m_id=p;
2219       if(p->isConstant()) {m_readytype='C';}
2220       else if(p->isExpanded()) {m_readytype='E';}
2221       else if (p->isTagged()) {m_readytype='T';}
2222       else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
2223       m_buffsRequired=1;
2224       m_samplesize=p->getNumDPPSample()*p->getNoValues();
2225       m_maxsamplesize=m_samplesize;
2226       m_left.reset();
2227       m_right.reset();
2228  }  }
2229    
2230    
 // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
2231  DataReady_ptr  DataReady_ptr
2232  DataLazy::resolve()  DataLazy::resolve()
2233  {  {
2234        resolveToIdentity();
2235        return m_id;
2236    }
2237    
2238  cout << "Sample size=" << m_samplesize << endl;  #ifdef LAZY_NODE_STORAGE
 cout << "Buffers=" << m_buffsRequired << endl;  
2239    
2240    // This version of resolve uses storage in each node to hold results
2241    DataReady_ptr
2242    DataLazy::resolveNodeWorker()
2243    {
2244      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2245      {
2246        collapse();
2247      }
2248      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
2249      {
2250        return m_id;
2251      }
2252        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2253      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2254      ValueType& resvec=result->getVectorRW();
2255      DataReady_ptr resptr=DataReady_ptr(result);
2256    
2257      int sample;
2258      int totalsamples=getNumSamples();
2259      const ValueType* res=0;   // Storage for answer
2260    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2261      #pragma omp parallel for private(sample,res) schedule(static)
2262      for (sample=0;sample<totalsamples;++sample)
2263      {
2264        size_t roffset=0;
2265    #ifdef _OPENMP
2266        res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2267    #else
2268        res=resolveNodeSample(0,sample,roffset);
2269    #endif
2270    LAZYDEBUG(cout << "Sample #" << sample << endl;)
2271    LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2272        DataVector::size_type outoffset=result->getPointOffset(sample,0);
2273        memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
2274      }
2275      return resptr;
2276    }
2277    
2278    #endif // LAZY_NODE_STORAGE
2279    
2280    // To simplify the memory management, all threads operate on one large vector, rather than one each.
2281    // Each sample is evaluated independently and copied into the result DataExpanded.
2282    DataReady_ptr
2283    DataLazy::resolveVectorWorker()
2284    {
2285    
2286    LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
2287    LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)
2288    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2289    {    {
2290      collapse();      collapse();
# Line 1020  cout << "Buffers=" << m_buffsRequired << Line 2298  cout << "Buffers=" << m_buffsRequired <<
2298      // storage to evaluate its expression      // storage to evaluate its expression
2299    int numthreads=1;    int numthreads=1;
2300  #ifdef _OPENMP  #ifdef _OPENMP
2301    numthreads=getNumberOfThreads();    numthreads=omp_get_max_threads();
   int threadnum=0;  
2302  #endif  #endif
2303    ValueType v(numthreads*threadbuffersize);    ValueType v(numthreads*threadbuffersize);
2304  cout << "Buffer created with size=" << v.size() << endl;  LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)
2305    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2306    ValueType& resvec=result->getVector();    ValueType& resvec=result->getVectorRW();
2307    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
2308    int sample;    int sample;
2309    size_t outoffset;     // offset in the output data    size_t outoffset;     // offset in the output data
2310    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
2311    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Vector storing the answer
2312    size_t resoffset=0;       // where in the vector to find the answer    size_t resoffset=0;       // where in the vector to find the answer
2313    #pragma omp parallel for private(sample,resoffset,outoffset,threadnum,res) schedule(static)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2314      #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
2315    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
2316    {    {
2317  cout << "################################# " << sample << endl;  LAZYDEBUG(cout << "################################# " << sample << endl;)
2318  #ifdef _OPENMP  #ifdef _OPENMP
2319      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);      res=resolveVectorSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
2320  #else  #else
2321      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.      res=resolveVectorSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
2322  #endif  #endif
2323  cerr << "-------------------------------- " << endl;  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
2324    LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
2325      outoffset=result->getPointOffset(sample,0);      outoffset=result->getPointOffset(sample,0);
2326  cerr << "offset=" << outoffset << endl;  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
2327      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
2328      {      {
2329    LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
2330      resvec[outoffset]=(*res)[resoffset];      resvec[outoffset]=(*res)[resoffset];
2331      }      }
2332  cerr << "*********************************" << endl;  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
2333    LAZYDEBUG(cerr << "*********************************" << endl;)
2334    }    }
2335    return resptr;    return resptr;
2336  }  }
# Line 1067  DataLazy::toString() const Line 2348  DataLazy::toString() const
2348  void  void
2349  DataLazy::intoString(ostringstream& oss) const  DataLazy::intoString(ostringstream& oss) const
2350  {  {
2351    //    oss << "[" << m_children <<";"<<m_height <<"]";
2352    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2353    {    {
2354    case G_IDENTITY:    case G_IDENTITY:
# Line 1096  DataLazy::intoString(ostringstream& oss) Line 2378  DataLazy::intoString(ostringstream& oss)
2378      oss << ')';      oss << ')';
2379      break;      break;
2380    case G_UNARY:    case G_UNARY:
2381      case G_UNARY_P:
2382    case G_NP1OUT:    case G_NP1OUT:
2383      case G_NP1OUT_P:
2384      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
2385      m_left->intoString(oss);      m_left->intoString(oss);
2386      oss << ')';      oss << ')';
# Line 1108  DataLazy::intoString(ostringstream& oss) Line 2392  DataLazy::intoString(ostringstream& oss)
2392      m_right->intoString(oss);      m_right->intoString(oss);
2393      oss << ')';      oss << ')';
2394      break;      break;
2395      case G_NP1OUT_2P:
2396        oss << opToString(m_op) << '(';
2397        m_left->intoString(oss);
2398        oss << ", " << m_axis_offset << ", " << m_transpose;
2399        oss << ')';
2400        break;
2401    default:    default:
2402      oss << "UNKNOWN";      oss << "UNKNOWN";
2403    }    }
# Line 1204  DataLazy::getPointOffset(int sampleNo, Line 2494  DataLazy::getPointOffset(int sampleNo,
2494    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");    throw DataException("Programmer error - getPointOffset on lazy data may require collapsing (but this object is marked const).");
2495  }  }
2496    
2497  // It would seem that DataTagged will need to be treated differently since even after setting all tags  
2498  // to zero, all the tags from all the DataTags would be in the result.  // I have decided to let Data:: handle this issue.
 // However since they all have the same value (0) whether they are there or not should not matter.  
 // So I have decided that for all types this method will create a constant 0.  
 // It can be promoted up as required.  
 // A possible efficiency concern might be expanded->constant->expanded which has an extra memory management  
 // but we can deal with that if it arrises.  
2499  void  void
2500  DataLazy::setToZero()  DataLazy::setToZero()
2501  {  {
2502    DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::ValueType v(getNoValues(),0);
2503    m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2504    m_op=IDENTITY;  //   m_op=IDENTITY;
2505    m_right.reset();    //   m_right.reset();  
2506    m_left.reset();  //   m_left.reset();
2507    m_readytype='C';  //   m_readytype='C';
2508    m_buffsRequired=1;  //   m_buffsRequired=1;
2509    
2510      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2511      throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2512    }
2513    
2514    bool
2515    DataLazy::actsExpanded() const
2516    {
2517        return (m_readytype=='E');
2518  }  }
2519    
2520    bool
2521    DataLazy::actsConstant() const
2522    {
2523        return (m_readytype=='C');
2524    }
2525    
2526    
2527  }   // end namespace  }   // end namespace

Legend:
Removed from v.2066  
changed lines
  Added in v.2635

  ViewVC Help
Powered by ViewVC 1.1.26