/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2737 by jfenwick, Tue Nov 3 00:44:00 2009 UTC revision 3917 by jfenwick, Thu Jul 5 00:17:50 2012 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2009 by University of Queensland  * Copyright (c) 2003-2012 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 13  Line 13 
13    
14    
15  #include "DataLazy.h"  #include "DataLazy.h"
16  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
17  #ifdef _OPENMP  #ifdef _OPENMP
18  #include <omp.h>  #include <omp.h>
19  #endif  #endif
# Line 30  Line 25 
25    
26  #include "EscriptParams.h"  #include "EscriptParams.h"
27    
28    #ifdef USE_NETCDF
29    #include <netcdfcpp.h>
30    #endif
31    
32  #include <iomanip>      // for some fancy formatting in debug  #include <iomanip>      // for some fancy formatting in debug
33    
34  // #define LAZYDEBUG(X) if (privdebug){X;}  // #define LAZYDEBUG(X) if (privdebug){X;}
# Line 44  bool privdebug=false; Line 43  bool privdebug=false;
43    
44  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
45    
46  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
47    
48    
49    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
50    
51  /*  /*
52  How does DataLazy work?  How does DataLazy work?
# Line 109  namespace escript Line 110  namespace escript
110  namespace  namespace
111  {  {
112    
113    
114    // enabling this will print out when ever the maximum stacksize used by resolve increases
115    // it assumes _OPENMP is also in use
116    //#define LAZY_STACK_PROF
117    
118    
119    
120    #ifndef _OPENMP
121      #ifdef LAZY_STACK_PROF
122      #undef LAZY_STACK_PROF
123      #endif
124    #endif
125    
126    
127    #ifdef LAZY_STACK_PROF
128    std::vector<void*> stackstart(getNumberOfThreads());
129    std::vector<void*> stackend(getNumberOfThreads());
130    size_t maxstackuse=0;
131    #endif
132    
133  enum ES_opgroup  enum ES_opgroup
134  {  {
135     G_UNKNOWN,     G_UNKNOWN,
# Line 120  enum ES_opgroup Line 141  enum ES_opgroup
141     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
142     G_TENSORPROD,    // general tensor product     G_TENSORPROD,    // general tensor product
143     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
144     G_REDUCTION      // non-pointwise unary op with a scalar output     G_REDUCTION,     // non-pointwise unary op with a scalar output
145       G_CONDEVAL
146  };  };
147    
148    
# Line 136  string ES_opstrings[]={"UNKNOWN","IDENTI Line 158  string ES_opstrings[]={"UNKNOWN","IDENTI
158              "prod",              "prod",
159              "transpose", "trace",              "transpose", "trace",
160              "swapaxes",              "swapaxes",
161              "minval", "maxval"};              "minval", "maxval",
162  int ES_opcount=43;              "condEval"};
163    int ES_opcount=44;
164  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
165              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
166              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 148  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 171  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
171              G_TENSORPROD,              G_TENSORPROD,
172              G_NP1OUT_P, G_NP1OUT_P,              G_NP1OUT_P, G_NP1OUT_P,
173              G_NP1OUT_2P,              G_NP1OUT_2P,
174              G_REDUCTION, G_REDUCTION};              G_REDUCTION, G_REDUCTION,
175                G_CONDEVAL};
176  inline  inline
177  ES_opgroup  ES_opgroup
178  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 221  resultShape(DataAbstract_ptr left, ES_op Line 245  resultShape(DataAbstract_ptr left, ES_op
245          int rank=left->getRank();          int rank=left->getRank();
246          if (axis_offset<0 || axis_offset>rank)          if (axis_offset<0 || axis_offset>rank)
247          {          {
248                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
249              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
250              for (int i=0; i<rank; i++)              throw DataException(e.str());
251            }
252            for (int i=0; i<rank; i++)
253          {          {
254             int index = (axis_offset+i)%rank;             int index = (axis_offset+i)%rank;
255                 sh.push_back(s[index]); // Append to new shape             sh.push_back(s[index]); // Append to new shape
256              }          }
257          return sh;          return sh;
258         }         }
259      break;      break;
# Line 306  SwapShape(DataAbstract_ptr left, const i Line 332  SwapShape(DataAbstract_ptr left, const i
332          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
333       }       }
334       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
335          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
336            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
337            throw DataException(e.str());
338       }       }
339       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
340           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          stringstream e;
341            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
342            throw DataException(e.str());
343       }       }
344       if (axis0 == axis1) {       if (axis0 == axis1) {
345           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 398  GTPShape(DataAbstract_ptr left, DataAbst Line 428  GTPShape(DataAbstract_ptr left, DataAbst
428    return shape2;    return shape2;
429  }  }
430    
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_REDUCTION:  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
431  }   // end anonymous namespace  }   // end anonymous namespace
432    
433    
# Line 439  opToString(ES_optype op) Line 443  opToString(ES_optype op)
443    return ES_opstrings[op];    return ES_opstrings[op];
444  }  }
445    
 #ifdef LAZY_NODE_STORAGE  
446  void DataLazy::LazyNodeSetup()  void DataLazy::LazyNodeSetup()
447  {  {
448  #ifdef _OPENMP  #ifdef _OPENMP
# Line 456  void DataLazy::LazyNodeSetup() Line 459  void DataLazy::LazyNodeSetup()
459      m_sampleids[0]=-1;      m_sampleids[0]=-1;
460  #endif  // _OPENMP  #endif  // _OPENMP
461  }  }
 #endif   // LAZY_NODE_STORAGE  
462    
463    
464  // Creates an identity node  // Creates an identity node
465  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
466      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
 #ifdef LAZY_NODE_STORAGE  
467      ,m_sampleids(0),      ,m_sampleids(0),
468      m_samples(1)      m_samples(1)
 #endif  
469  {  {
470     if (p->isLazy())     if (p->isLazy())
471     {     {
# Line 507  DataLazy::DataLazy(DataAbstract_ptr left Line 507  DataLazy::DataLazy(DataAbstract_ptr left
507     }     }
508     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
509     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
510     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
511     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
512     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
513     LazyNodeSetup();     LazyNodeSetup();
 #endif  
514     SIZELIMIT     SIZELIMIT
515  }  }
516    
# Line 581  LAZYDEBUG(cout << "Right " << right.get( Line 577  LAZYDEBUG(cout << "Right " << right.get(
577      m_readytype='C';      m_readytype='C';
578     }     }
579     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
580     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
581     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
582     LazyNodeSetup();     LazyNodeSetup();
 #endif  
583     SIZELIMIT     SIZELIMIT
584  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
585  }  }
# Line 651  DataLazy::DataLazy(DataAbstract_ptr left Line 643  DataLazy::DataLazy(DataAbstract_ptr left
643      m_readytype='C';      m_readytype='C';
644     }     }
645     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
646     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
647     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
648     LazyNodeSetup();     LazyNodeSetup();
 #endif  
649     SIZELIMIT     SIZELIMIT
650  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
651  }  }
# Line 685  DataLazy::DataLazy(DataAbstract_ptr left Line 673  DataLazy::DataLazy(DataAbstract_ptr left
673     }     }
674     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
675     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
676     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
677     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
678     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
679     LazyNodeSetup();     LazyNodeSetup();
 #endif  
680     SIZELIMIT     SIZELIMIT
681  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
682  }  }
# Line 719  DataLazy::DataLazy(DataAbstract_ptr left Line 703  DataLazy::DataLazy(DataAbstract_ptr left
703     }     }
704     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
705     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
706     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
707     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
708     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
709     LazyNodeSetup();     LazyNodeSetup();
 #endif  
710     SIZELIMIT     SIZELIMIT
711  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
712  }  }
# Line 754  DataLazy::DataLazy(DataAbstract_ptr left Line 734  DataLazy::DataLazy(DataAbstract_ptr left
734     }     }
735     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
736     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
737     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
738     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
739     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
740     LazyNodeSetup();     LazyNodeSetup();
 #endif  
741     SIZELIMIT     SIZELIMIT
742  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
743  }  }
744    
745  DataLazy::~DataLazy()  
746    namespace
747  {  {
 #ifdef LAZY_NODE_SETUP  
    delete[] m_sampleids;  
    delete[] m_samples;  
 #endif  
 }  
748    
749        inline int max3(int a, int b, int c)
750        {
751        int t=(a>b?a:b);
752        return (t>c?t:c);
753    
754  int      }
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
755  }  }
756    
757    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
758  size_t      : parent(left->getFunctionSpace(), left->getShape()),
759  DataLazy::getMaxSampleSize() const      m_op(CONDEVAL),
760        m_axis_offset(0),
761        m_transpose(0),
762        m_tol(0)
763  {  {
764      return m_maxsamplesize;  
765       DataLazy_ptr lmask;
766       DataLazy_ptr lleft;
767       DataLazy_ptr lright;
768       if (!mask->isLazy())
769       {
770        lmask=DataLazy_ptr(new DataLazy(mask));
771       }
772       else
773       {
774        lmask=dynamic_pointer_cast<DataLazy>(mask);
775       }
776       if (!left->isLazy())
777       {
778        lleft=DataLazy_ptr(new DataLazy(left));
779       }
780       else
781       {
782        lleft=dynamic_pointer_cast<DataLazy>(left);
783       }
784       if (!right->isLazy())
785       {
786        lright=DataLazy_ptr(new DataLazy(right));
787       }
788       else
789       {
790        lright=dynamic_pointer_cast<DataLazy>(right);
791       }
792       m_readytype=lmask->m_readytype;
793       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
794       {
795        throw DataException("Programmer Error - condEval arguments must have the same readytype");
796       }
797       m_left=lleft;
798       m_right=lright;
799       m_mask=lmask;
800       m_samplesize=getNumDPPSample()*getNoValues();
801       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
802       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
803       LazyNodeSetup();
804       SIZELIMIT
805    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
806  }  }
807    
808    
809    
810  size_t  DataLazy::~DataLazy()
 DataLazy::getSampleBufferSize() const  
811  {  {
812      return m_maxsamplesize*(max(1,m_buffsRequired));     delete[] m_sampleids;
813  }  }
814    
815    
816  /*  /*
817    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
818    This does the work for the collapse method.    This does the work for the collapse method.
# Line 971  DataLazy::collapse() Line 988  DataLazy::collapse()
988    m_op=IDENTITY;    m_op=IDENTITY;
989  }  }
990    
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
   }  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);  
   const double* left=&((*vleft)[roffset]);  
   double* result=&(v[offset]);  
   roffset=offset;  
   switch (m_op)  
   {  
     case SIN:    
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);  
     break;  
     case COS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);  
     break;  
     case TAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);  
     break;  
     case ASIN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);  
     break;  
     case ACOS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);  
     break;  
     case ATAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);  
     break;  
     case SINH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);  
     break;  
     case COSH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);  
     break;  
     case TANH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);  
     break;  
     case ERF:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
    case ASINH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
    case ACOSH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
    case ATANH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
     case LOG10:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);  
     break;  
     case LOG:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);  
     break;  
     case SIGN:  
     tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
     break;  
     case ABS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);  
     break;  
     case NEG:  
     tensor_unary_operation(m_samplesize, left, result, negate<double>());  
     break;  
     case POS:  
     // it doesn't mean anything for delayed.  
     // it will just trigger a deep copy of the lazy object  
     throw DataException("Programmer error - POS not supported for lazy data.");  
     break;  
     case EXP:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);  
     break;  
     case SQRT:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);  
     break;  
     case RECIP:  
     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
     break;  
     case GZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
     break;  
     case LZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
     break;  
     case GEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
     break;  
     case LEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
     break;  
 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  
     case NEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));  
     break;  
     case EZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));  
     break;  
   
     default:  
     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
 /*  
   \brief Compute the value of the expression (reduction operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveReduction(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
   }  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);  
   double* result=&(v[offset]);  
   roffset=offset;  
   unsigned int ndpps=getNumDPPSample();  
   unsigned int psize=DataTypes::noValues(getShape());  
   switch (m_op)  
   {  
     case MINVAL:  
     {  
       for (unsigned int z=0;z<ndpps;++z)  
       {  
          FMin op;  
          *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max());  
          roffset+=psize;  
          result++;  
       }  
     }  
     break;  
     case MAXVAL:  
     {  
       for (unsigned int z=0;z<ndpps;++z)  
       {  
          FMax op;  
          *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max()*-1);  
          roffset+=psize;  
          result++;  
       }  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveReduction can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
   
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset=roffset+m_samplesize;  
 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t step=getNoValues();  
   switch (m_op)  
   {  
     case SYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     case NSYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case TRACE:  
     for (loop=0;loop<numsteps;++loop)  
     {  
 size_t zz=sampleNo*getNumDPPSample()+loop;  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "point=" <<  zz<< endl;)  
 LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)  
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     case TRANS:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
991    
992    
 /*  
   \brief Compute the value of the expression (unary operation with int params) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case SWAP:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
993    
994    
995    
# Line 1361  LAZYDEBUG(cout << " result=      " << re Line 1009  LAZYDEBUG(cout << " result=      " << re
1009        rroffset+=orightstep;\        rroffset+=orightstep;\
1010      }      }
1011    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // If both children are expanded, then we can process them in a single operation (we treat  
 // the whole sample as one big datapoint.  
 // If one of the children is not expanded, then we need to treat each point in the sample  
 // individually.  
 // There is an additional complication when scalar operations are considered.  
 // For example, 2+Vector.  
 // In this case each double within the point is treated individually  
 DataTypes::ValueType*  
 DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   if (!leftExp && !rightExp)  
   {  
     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");  
   }  
   bool leftScalar=(m_left->getRank()==0);  
   bool rightScalar=(m_right->getRank()==0);  
   if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))  
   {  
     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");  
   }  
   size_t leftsize=m_left->getNoValues();  
   size_t rightsize=m_right->getNoValues();  
   size_t chunksize=1;           // how many doubles will be processed in one go  
   int leftstep=0;       // how far should the left offset advance after each step  
   int rightstep=0;  
   int numsteps=0;       // total number of steps for the inner loop  
   int oleftstep=0;  // the o variables refer to the outer loop  
   int orightstep=0; // The outer loop is only required in cases where there is an extended scalar  
   int onumsteps=1;  
     
   bool LES=(leftExp && leftScalar); // Left is an expanded scalar  
   bool RES=(rightExp && rightScalar);  
   bool LS=(!leftExp && leftScalar); // left is a single scalar  
   bool RS=(!rightExp && rightScalar);  
   bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar  
   bool RN=(!rightExp && !rightScalar);  
   bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar  
   bool REN=(rightExp && !rightScalar);  
   
   if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars  
   {  
     chunksize=m_left->getNumDPPSample()*leftsize;  
     leftstep=0;  
     rightstep=0;  
     numsteps=1;  
   }  
   else if (LES || RES)  
   {  
     chunksize=1;  
     if (LES)        // left is an expanded scalar  
     {  
         if (RS)  
         {  
            leftstep=1;  
            rightstep=0;  
            numsteps=m_left->getNumDPPSample();  
         }  
         else        // RN or REN  
         {  
            leftstep=0;  
            oleftstep=1;  
            rightstep=1;  
            orightstep=(RN ? -(int)rightsize : 0);  
            numsteps=rightsize;  
            onumsteps=m_left->getNumDPPSample();  
         }  
     }  
     else        // right is an expanded scalar  
     {  
         if (LS)  
         {  
            rightstep=1;  
            leftstep=0;  
            numsteps=m_right->getNumDPPSample();  
         }  
         else  
         {  
            rightstep=0;  
            orightstep=1;  
            leftstep=1;  
            oleftstep=(LN ? -(int)leftsize : 0);  
            numsteps=leftsize;  
            onumsteps=m_right->getNumDPPSample();  
         }  
     }  
   }  
   else  // this leaves (LEN, RS), (LEN, RN) and their transposes  
   {  
     if (LEN)    // and Right will be a single value  
     {  
         chunksize=rightsize;  
         leftstep=rightsize;  
         rightstep=0;  
         numsteps=m_left->getNumDPPSample();  
         if (RS)  
         {  
            numsteps*=leftsize;  
         }  
     }  
     else    // REN  
     {  
         chunksize=leftsize;  
         rightstep=leftsize;  
         leftstep=0;  
         numsteps=m_right->getNumDPPSample();  
         if (LS)  
         {  
            numsteps*=rightsize;  
         }  
     }  
   }  
   
   int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0  
     // Get the values of sub-expressions  
   const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on  
     // calcBufss for why we can't put offset as the 2nd param above  
   const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  
 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  
 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  
 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)  
 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)  
 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  
 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  
   
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case ADD:  
         PROC_OP(NO_ARG,plus<double>());  
     break;  
     case SUB:  
     PROC_OP(NO_ARG,minus<double>());  
     break;  
     case MUL:  
     PROC_OP(NO_ARG,multiplies<double>());  
     break;  
     case DIV:  
     PROC_OP(NO_ARG,divides<double>());  
     break;  
     case POW:  
        PROC_OP(double (double,double),::pow);  
     break;  
     default:  
     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // unlike the other resolve helpers, we must treat these datapoints separately.  
 DataTypes::ValueType*  
 DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   int steps=getNumDPPSample();  
 /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
   int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method  
   int rightStep=(rightExp?m_right->getNoValues() : 0);  
   
   int resultStep=getNoValues();  
     // Get the values of sub-expressions (leave a gap of one sample for the result).  
   int gap=offset+m_samplesize;    
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
   
   const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);  
   gap+=m_left->getMaxSampleSize();  
   
   
 LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)  
   
   
   const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);  
   
 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  
 cout << getNoValues() << endl;)  
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
   
 for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  
 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  
 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  
 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)  
 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  
 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case PROD:  
     for (int i=0;i<steps;++i,resultp+=resultStep)  
     {  
   
 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)  
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
           const double *ptr_0 = &((*left)[lroffset]);  
           const double *ptr_1 = &((*right)[rroffset]);  
   
 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)  
 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)  
   
           matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
   
 LAZYDEBUG(cout << "Results--\n";  
 {  
   DataVector dv(getNoValues());  
 for (int z=0;z<getNoValues();++z)  
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
       lroffset+=leftStep;  
       rroffset+=rightStep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
 #ifdef LAZY_NODE_STORAGE  
1012    
1013  // The result will be stored in m_samples  // The result will be stored in m_samples
1014  // The return value is a pointer to the DataVector, offset is the offset within the return value  // The return value is a pointer to the DataVector, offset is the offset within the return value
# Line 1667  LAZYDEBUG(cout << "Resolve sample " << t Line 1024  LAZYDEBUG(cout << "Resolve sample " << t
1024    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
1025    {    {
1026      const ValueType& vec=m_id->getVectorRO();      const ValueType& vec=m_id->getVectorRO();
 //     if (m_readytype=='C')  
 //     {  
 //  roffset=0;      // all samples read from the same position  
 //  return &(m_samples);  
 //     }  
1027      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
1028    #ifdef LAZY_STACK_PROF
1029    int x;
1030    if (&x<stackend[omp_get_thread_num()])
1031    {
1032           stackend[omp_get_thread_num()]=&x;
1033    }
1034    #endif
1035      return &(vec);      return &(vec);
1036    }    }
1037    if (m_readytype!='E')    if (m_readytype!='E')
# Line 1685  LAZYDEBUG(cout << "Resolve sample " << t Line 1044  LAZYDEBUG(cout << "Resolve sample " << t
1044      return &(m_samples);        // sample is already resolved      return &(m_samples);        // sample is already resolved
1045    }    }
1046    m_sampleids[tid]=sampleNo;    m_sampleids[tid]=sampleNo;
1047    
1048    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1049    {    {
1050    case G_UNARY:    case G_UNARY:
# Line 1695  LAZYDEBUG(cout << "Resolve sample " << t Line 1055  LAZYDEBUG(cout << "Resolve sample " << t
1055    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1056    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1057    case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);    case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1058      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1059    default:    default:
1060      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1061    }    }
# Line 1852  DataLazy::resolveNodeReduction(int tid, Line 1213  DataLazy::resolveNodeReduction(int tid,
1213    
1214    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1215    unsigned int ndpps=getNumDPPSample();    unsigned int ndpps=getNumDPPSample();
1216    unsigned int psize=DataTypes::noValues(getShape());    unsigned int psize=DataTypes::noValues(m_left->getShape());
1217    double* result=&(m_samples[roffset]);    double* result=&(m_samples[roffset]);
1218    switch (m_op)    switch (m_op)
1219    {    {
# Line 2013  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1374  DataLazy::resolveNodeNP1OUT_2P(int tid,
1374    return &m_samples;    return &m_samples;
1375  }  }
1376    
1377    const DataTypes::ValueType*
1378    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1379    {
1380      if (m_readytype!='E')
1381      {
1382        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1383      }
1384      if (m_op!=CONDEVAL)
1385      {
1386        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1387      }
1388      size_t subroffset;
1389    
1390      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1391      const ValueType* srcres=0;
1392      if ((*maskres)[subroffset]>0)
1393      {
1394        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1395      }
1396      else
1397      {
1398        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1399      }
1400    
1401      // Now we need to copy the result
1402    
1403      roffset=m_samplesize*tid;
1404      for (int i=0;i<m_samplesize;++i)
1405      {
1406        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1407      }
1408    
1409      return &m_samples;
1410    }
1411    
1412  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1413  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
# Line 2238  LAZYDEBUG(cout << DataTypes::pointToStri Line 1632  LAZYDEBUG(cout << DataTypes::pointToStri
1632    roffset=offset;    roffset=offset;
1633    return &m_samples;    return &m_samples;
1634  }  }
 #endif //LAZY_NODE_STORAGE  
   
 /*  
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   case G_REDUCTION: return resolveReduction(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
1635    
 }  
1636    
1637  const DataTypes::ValueType*  const DataTypes::ValueType*
1638  DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset)
1639  {  {
1640  #ifdef _OPENMP  #ifdef _OPENMP
1641      int tid=omp_get_thread_num();      int tid=omp_get_thread_num();
1642  #else  #else
1643      int tid=0;      int tid=0;
1644  #endif  #endif
1645  #ifdef LAZY_NODE_STORAGE  
1646      return resolveNodeSample(tid, sampleNo, roffset);  #ifdef LAZY_STACK_PROF
1647        stackstart[tid]=&tid;
1648        stackend[tid]=&tid;
1649        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1650        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1651        #pragma omp critical
1652        if (d>maxstackuse)
1653        {
1654    cout << "Max resolve Stack use " << d << endl;
1655            maxstackuse=d;
1656        }
1657        return r;
1658  #else  #else
1659      return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);      return resolveNodeSample(tid, sampleNo, roffset);
1660  #endif  #endif
1661  }  }
1662    
# Line 2320  DataLazy::resolveToIdentity() Line 1667  DataLazy::resolveToIdentity()
1667  {  {
1668     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1669      return;      return;
 #ifndef LAZY_NODE_STORAGE  
    DataReady_ptr p=resolveVectorWorker();  
 #else  
1670     DataReady_ptr p=resolveNodeWorker();     DataReady_ptr p=resolveNodeWorker();
 #endif  
1671     makeIdentity(p);     makeIdentity(p);
1672  }  }
1673    
# Line 2340  void DataLazy::makeIdentity(const DataRe Line 1683  void DataLazy::makeIdentity(const DataRe
1683     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1684     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1685     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1686     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1687     m_left.reset();     m_left.reset();
1688     m_right.reset();     m_right.reset();
1689  }  }
# Line 2355  DataLazy::resolve() Line 1696  DataLazy::resolve()
1696      return m_id;      return m_id;
1697  }  }
1698    
 #ifdef LAZY_NODE_STORAGE  
1699    
1700  // This version of resolve uses storage in each node to hold results  /* This is really a static method but I think that caused problems in windows */
1701  DataReady_ptr  void
1702  DataLazy::resolveNodeWorker()  DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1703  {  {
1704    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (dats.empty())
1705    {    {
1706      collapse();      return;
1707    }    }
1708    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    vector<DataLazy*> work;
1709      FunctionSpace fs=dats[0]->getFunctionSpace();
1710      bool match=true;
1711      for (int i=dats.size()-1;i>=0;--i)
1712    {    {
1713      return m_id;      if (dats[i]->m_readytype!='E')
1714        {
1715            dats[i]->collapse();
1716        }
1717        if (dats[i]->m_op!=IDENTITY)
1718        {
1719            work.push_back(dats[i]);
1720            if (fs!=dats[i]->getFunctionSpace())
1721            {
1722                match=false;
1723            }
1724        }
1725    }    }
1726      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'    if (work.empty())
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVectorRW();  
   DataReady_ptr resptr=DataReady_ptr(result);  
   
   int sample;  
   int totalsamples=getNumSamples();  
   const ValueType* res=0;   // Storage for answer  
 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  
   #pragma omp parallel for private(sample,res) schedule(static)  
   for (sample=0;sample<totalsamples;++sample)  
1727    {    {
1728      size_t roffset=0;      return;     // no work to do
1729      }
1730      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1731      {     // it is possible that dats[0] is one of the objects which we discarded and
1732            // all the other functionspaces match.
1733        vector<DataExpanded*> dep;
1734        vector<ValueType*> vecs;
1735        for (int i=0;i<work.size();++i)
1736        {
1737            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1738            vecs.push_back(&(dep[i]->getVectorRW()));
1739        }
1740        int totalsamples=work[0]->getNumSamples();
1741        const ValueType* res=0; // Storage for answer
1742        int sample;
1743        #pragma omp parallel private(sample, res)
1744        {
1745            size_t roffset=0;
1746            #pragma omp for schedule(static)
1747            for (sample=0;sample<totalsamples;++sample)
1748            {
1749            roffset=0;
1750            int j;
1751            for (j=work.size()-1;j>=0;--j)
1752            {
1753  #ifdef _OPENMP  #ifdef _OPENMP
1754      res=resolveNodeSample(omp_get_thread_num(),sample,roffset);                  res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1755  #else  #else
1756      res=resolveNodeSample(0,sample,roffset);                  res=work[j]->resolveNodeSample(0,sample,roffset);
1757  #endif  #endif
1758  LAZYDEBUG(cout << "Sample #" << sample << endl;)                  DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1759  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )                  memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1760      DataVector::size_type outoffset=result->getPointOffset(sample,0);          }
1761      memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));          }
1762        }
1763        // Now we need to load the new results as identity ops into the lazy nodes
1764        for (int i=work.size()-1;i>=0;--i)
1765        {
1766            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1767        }
1768      }
1769      else  // functionspaces do not match
1770      {
1771        for (int i=0;i<work.size();++i)
1772        {
1773            work[i]->resolveToIdentity();
1774        }
1775    }    }
   return resptr;  
1776  }  }
1777    
 #endif // LAZY_NODE_STORAGE  
1778    
1779  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1780  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1781  DataReady_ptr  DataReady_ptr
1782  DataLazy::resolveVectorWorker()  DataLazy::resolveNodeWorker()
1783  {  {
   
 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  
1784    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1785    {    {
1786      collapse();      collapse();
# Line 2414  LAZYDEBUG(cout << "Buffers=" << m_buffsR Line 1790  LAZYDEBUG(cout << "Buffers=" << m_buffsR
1790      return m_id;      return m_id;
1791    }    }
1792      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
1793    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1794    ValueType& resvec=result->getVectorRW();    ValueType& resvec=result->getVectorRW();
1795    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1796    
1797    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1798    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1799    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
   size_t resoffset=0;       // where in the vector to find the answer  
1800  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1801    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1802    {    {
1803  LAZYDEBUG(cout << "################################# " << sample << endl;)      size_t roffset=0;
1804    #ifdef LAZY_STACK_PROF
1805        stackstart[omp_get_thread_num()]=&roffset;
1806        stackend[omp_get_thread_num()]=&roffset;
1807    #endif
1808        #pragma omp for schedule(static)
1809        for (sample=0;sample<totalsamples;++sample)
1810        {
1811            roffset=0;
1812  #ifdef _OPENMP  #ifdef _OPENMP
1813      res=resolveVectorSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1814  #else  #else
1815      res=resolveVectorSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1816  #endif  #endif
1817  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1818  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1819      outoffset=result->getPointOffset(sample,0);              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1820  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1821      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      }
     {  
 LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  
     resvec[outoffset]=(*res)[resoffset];  
     }  
 LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)  
 LAZYDEBUG(cerr << "*********************************" << endl;)  
1822    }    }
1823    #ifdef LAZY_STACK_PROF
1824      for (int i=0;i<getNumberOfThreads();++i)
1825      {
1826        size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1827    //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1828        if (r>maxstackuse)
1829        {
1830            maxstackuse=r;
1831        }
1832      }
1833      cout << "Max resolve Stack use=" << maxstackuse << endl;
1834    #endif
1835    return resptr;    return resptr;
1836  }  }
1837    
# Line 2459  std::string Line 1839  std::string
1839  DataLazy::toString() const  DataLazy::toString() const
1840  {  {
1841    ostringstream oss;    ostringstream oss;
1842    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1843    if (escriptParams.getPRINT_LAZY_TREE()==0)    switch (escriptParams.getLAZY_STR_FMT())
   {  
       intoString(oss);  
   }  
   else  
1844    {    {
1845      case 1:   // tree format
1846      oss << endl;      oss << endl;
1847      intoTreeString(oss,"");      intoTreeString(oss,"");
1848        break;
1849      case 2:   // just the depth
1850        break;
1851      default:
1852        intoString(oss);
1853        break;
1854    }    }
1855    return oss.str();    return oss.str();
1856  }  }
# Line 2527  DataLazy::intoString(ostringstream& oss) Line 1910  DataLazy::intoString(ostringstream& oss)
1910      oss << ", " << m_axis_offset << ", " << m_transpose;      oss << ", " << m_axis_offset << ", " << m_transpose;
1911      oss << ')';      oss << ')';
1912      break;      break;
1913      case G_CONDEVAL:
1914        oss << opToString(m_op)<< '(' ;
1915        m_mask->intoString(oss);
1916        oss << " ? ";
1917        m_left->intoString(oss);
1918        oss << " : ";
1919        m_right->intoString(oss);
1920        oss << ')';
1921        break;
1922    default:    default:
1923      oss << "UNKNOWN";      oss << "UNKNOWN";
1924    }    }

Legend:
Removed from v.2737  
changed lines
  Added in v.3917

  ViewVC Help
Powered by ViewVC 1.1.26