/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2734 by jfenwick, Fri Oct 30 05:39:48 2009 UTC revision 4286 by caltinay, Thu Mar 7 04:28:11 2013 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2009 by University of Queensland  * Copyright (c) 2003-2013 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development since 2012 by School of Earth Sciences
13    *
14    *****************************************************************************/
15    
16    
17  #include "DataLazy.h"  #include "DataLazy.h"
18  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
19  #ifdef _OPENMP  #ifdef _OPENMP
20  #include <omp.h>  #include <omp.h>
21  #endif  #endif
# Line 30  Line 27 
27    
28  #include "EscriptParams.h"  #include "EscriptParams.h"
29    
30    #ifdef USE_NETCDF
31    #include <netcdfcpp.h>
32    #endif
33    
34  #include <iomanip>      // for some fancy formatting in debug  #include <iomanip>      // for some fancy formatting in debug
35    
36  // #define LAZYDEBUG(X) if (privdebug){X;}  // #define LAZYDEBUG(X) if (privdebug){X;}
# Line 44  bool privdebug=false; Line 45  bool privdebug=false;
45    
46  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
47    
48  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
49    
50    
51    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
52    
53  /*  /*
54  How does DataLazy work?  How does DataLazy work?
55  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 58  A special operation, IDENTITY, stores an Line 61  A special operation, IDENTITY, stores an
61  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
62    
63  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
64  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
65    
66  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
67  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 67  I will refer to individual DataLazy obje Line 70  I will refer to individual DataLazy obje
70  Each node also stores:  Each node also stores:
71  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
72      evaluated.      evaluated.
73  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
74      evaluate the expression.      evaluate the expression.
75  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
76    
# Line 109  namespace escript Line 112  namespace escript
112  namespace  namespace
113  {  {
114    
115    
116    // enabling this will print out when ever the maximum stacksize used by resolve increases
117    // it assumes _OPENMP is also in use
118    //#define LAZY_STACK_PROF
119    
120    
121    
122    #ifndef _OPENMP
123      #ifdef LAZY_STACK_PROF
124      #undef LAZY_STACK_PROF
125      #endif
126    #endif
127    
128    
129    #ifdef LAZY_STACK_PROF
130    std::vector<void*> stackstart(getNumberOfThreads());
131    std::vector<void*> stackend(getNumberOfThreads());
132    size_t maxstackuse=0;
133    #endif
134    
135  enum ES_opgroup  enum ES_opgroup
136  {  {
137     G_UNKNOWN,     G_UNKNOWN,
# Line 120  enum ES_opgroup Line 143  enum ES_opgroup
143     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
144     G_TENSORPROD,    // general tensor product     G_TENSORPROD,    // general tensor product
145     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
146     G_REDUCTION      // non-pointwise unary op with a scalar output     G_REDUCTION,     // non-pointwise unary op with a scalar output
147       G_CONDEVAL
148  };  };
149    
150    
# Line 136  string ES_opstrings[]={"UNKNOWN","IDENTI Line 160  string ES_opstrings[]={"UNKNOWN","IDENTI
160              "prod",              "prod",
161              "transpose", "trace",              "transpose", "trace",
162              "swapaxes",              "swapaxes",
163              "minval", "maxval"};              "minval", "maxval",
164  int ES_opcount=43;              "condEval"};
165    int ES_opcount=44;
166  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
167              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
168              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 148  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 173  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
173              G_TENSORPROD,              G_TENSORPROD,
174              G_NP1OUT_P, G_NP1OUT_P,              G_NP1OUT_P, G_NP1OUT_P,
175              G_NP1OUT_2P,              G_NP1OUT_2P,
176              G_REDUCTION, G_REDUCTION};              G_REDUCTION, G_REDUCTION,
177                G_CONDEVAL};
178  inline  inline
179  ES_opgroup  ES_opgroup
180  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 169  resultFS(DataAbstract_ptr left, DataAbst Line 195  resultFS(DataAbstract_ptr left, DataAbst
195    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
196    if (l!=r)    if (l!=r)
197    {    {
198      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
199        if (res==1)
200      {      {
201      return l;      return l;
202      }      }
203      if (l.probeInterpolation(r))      if (res==-1)
204      {      {
205      return r;      return r;
206      }      }
# Line 221  resultShape(DataAbstract_ptr left, ES_op Line 248  resultShape(DataAbstract_ptr left, ES_op
248          int rank=left->getRank();          int rank=left->getRank();
249          if (axis_offset<0 || axis_offset>rank)          if (axis_offset<0 || axis_offset>rank)
250          {          {
251                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
252              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
253              for (int i=0; i<rank; i++)              throw DataException(e.str());
254            }
255            for (int i=0; i<rank; i++)
256          {          {
257             int index = (axis_offset+i)%rank;             int index = (axis_offset+i)%rank;
258                 sh.push_back(s[index]); // Append to new shape             sh.push_back(s[index]); // Append to new shape
259              }          }
260          return sh;          return sh;
261         }         }
262      break;      break;
# Line 306  SwapShape(DataAbstract_ptr left, const i Line 335  SwapShape(DataAbstract_ptr left, const i
335          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
336       }       }
337       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
338          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
339            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
340            throw DataException(e.str());
341       }       }
342       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
343           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          stringstream e;
344            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
345            throw DataException(e.str());
346       }       }
347       if (axis0 == axis1) {       if (axis0 == axis1) {
348           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 398  GTPShape(DataAbstract_ptr left, DataAbst Line 431  GTPShape(DataAbstract_ptr left, DataAbst
431    return shape2;    return shape2;
432  }  }
433    
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_REDUCTION:  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
434  }   // end anonymous namespace  }   // end anonymous namespace
435    
436    
# Line 439  opToString(ES_optype op) Line 446  opToString(ES_optype op)
446    return ES_opstrings[op];    return ES_opstrings[op];
447  }  }
448    
 #ifdef LAZY_NODE_STORAGE  
449  void DataLazy::LazyNodeSetup()  void DataLazy::LazyNodeSetup()
450  {  {
451  #ifdef _OPENMP  #ifdef _OPENMP
# Line 456  void DataLazy::LazyNodeSetup() Line 462  void DataLazy::LazyNodeSetup()
462      m_sampleids[0]=-1;      m_sampleids[0]=-1;
463  #endif  // _OPENMP  #endif  // _OPENMP
464  }  }
 #endif   // LAZY_NODE_STORAGE  
465    
466    
467  // Creates an identity node  // Creates an identity node
468  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
469      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
 #ifdef LAZY_NODE_STORAGE  
470      ,m_sampleids(0),      ,m_sampleids(0),
471      m_samples(1)      m_samples(1)
 #endif  
472  {  {
473     if (p->isLazy())     if (p->isLazy())
474     {     {
# Line 507  DataLazy::DataLazy(DataAbstract_ptr left Line 510  DataLazy::DataLazy(DataAbstract_ptr left
510     }     }
511     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
512     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
513     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
514     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
515     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
516     LazyNodeSetup();     LazyNodeSetup();
 #endif  
517     SIZELIMIT     SIZELIMIT
518  }  }
519    
# Line 581  LAZYDEBUG(cout << "Right " << right.get( Line 580  LAZYDEBUG(cout << "Right " << right.get(
580      m_readytype='C';      m_readytype='C';
581     }     }
582     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
583     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
584     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
585     LazyNodeSetup();     LazyNodeSetup();
 #endif  
586     SIZELIMIT     SIZELIMIT
587  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588  }  }
# Line 651  DataLazy::DataLazy(DataAbstract_ptr left Line 646  DataLazy::DataLazy(DataAbstract_ptr left
646      m_readytype='C';      m_readytype='C';
647     }     }
648     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
649     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
650     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
651     LazyNodeSetup();     LazyNodeSetup();
 #endif  
652     SIZELIMIT     SIZELIMIT
653  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
654  }  }
# Line 685  DataLazy::DataLazy(DataAbstract_ptr left Line 676  DataLazy::DataLazy(DataAbstract_ptr left
676     }     }
677     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
678     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
679     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
680     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
681     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
682     LazyNodeSetup();     LazyNodeSetup();
 #endif  
683     SIZELIMIT     SIZELIMIT
684  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
685  }  }
# Line 719  DataLazy::DataLazy(DataAbstract_ptr left Line 706  DataLazy::DataLazy(DataAbstract_ptr left
706     }     }
707     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
708     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
709     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
710     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
711     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
712     LazyNodeSetup();     LazyNodeSetup();
 #endif  
713     SIZELIMIT     SIZELIMIT
714  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
715  }  }
# Line 754  DataLazy::DataLazy(DataAbstract_ptr left Line 737  DataLazy::DataLazy(DataAbstract_ptr left
737     }     }
738     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
739     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
740     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
741     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
742     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
743     LazyNodeSetup();     LazyNodeSetup();
 #endif  
744     SIZELIMIT     SIZELIMIT
745  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
746  }  }
747    
748  DataLazy::~DataLazy()  
749    namespace
750  {  {
 #ifdef LAZY_NODE_SETUP  
    delete[] m_sampleids;  
    delete[] m_samples;  
 #endif  
 }  
751    
752        inline int max3(int a, int b, int c)
753        {
754        int t=(a>b?a:b);
755        return (t>c?t:c);
756    
757  int      }
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
758  }  }
759    
760    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
761  size_t      : parent(left->getFunctionSpace(), left->getShape()),
762  DataLazy::getMaxSampleSize() const      m_op(CONDEVAL),
763        m_axis_offset(0),
764        m_transpose(0),
765        m_tol(0)
766  {  {
767      return m_maxsamplesize;  
768       DataLazy_ptr lmask;
769       DataLazy_ptr lleft;
770       DataLazy_ptr lright;
771       if (!mask->isLazy())
772       {
773        lmask=DataLazy_ptr(new DataLazy(mask));
774       }
775       else
776       {
777        lmask=dynamic_pointer_cast<DataLazy>(mask);
778       }
779       if (!left->isLazy())
780       {
781        lleft=DataLazy_ptr(new DataLazy(left));
782       }
783       else
784       {
785        lleft=dynamic_pointer_cast<DataLazy>(left);
786       }
787       if (!right->isLazy())
788       {
789        lright=DataLazy_ptr(new DataLazy(right));
790       }
791       else
792       {
793        lright=dynamic_pointer_cast<DataLazy>(right);
794       }
795       m_readytype=lmask->m_readytype;
796       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
797       {
798        throw DataException("Programmer Error - condEval arguments must have the same readytype");
799       }
800       m_left=lleft;
801       m_right=lright;
802       m_mask=lmask;
803       m_samplesize=getNumDPPSample()*getNoValues();
804       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
805       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
806       LazyNodeSetup();
807       SIZELIMIT
808    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
809  }  }
810    
811    
812    
813  size_t  DataLazy::~DataLazy()
 DataLazy::getSampleBufferSize() const  
814  {  {
815      return m_maxsamplesize*(max(1,m_buffsRequired));     delete[] m_sampleids;
816  }  }
817    
818    
819  /*  /*
820    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
821    This does the work for the collapse method.    This does the work for the collapse method.
# Line 971  DataLazy::collapse() Line 991  DataLazy::collapse()
991    m_op=IDENTITY;    m_op=IDENTITY;
992  }  }
993    
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
   }  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);  
   const double* left=&((*vleft)[roffset]);  
   double* result=&(v[offset]);  
   roffset=offset;  
   switch (m_op)  
   {  
     case SIN:    
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);  
     break;  
     case COS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);  
     break;  
     case TAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);  
     break;  
     case ASIN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);  
     break;  
     case ACOS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);  
     break;  
     case ATAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);  
     break;  
     case SINH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);  
     break;  
     case COSH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);  
     break;  
     case TANH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);  
     break;  
     case ERF:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
    case ASINH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
    case ACOSH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
    case ATANH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
     case LOG10:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);  
     break;  
     case LOG:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);  
     break;  
     case SIGN:  
     tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
     break;  
     case ABS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);  
     break;  
     case NEG:  
     tensor_unary_operation(m_samplesize, left, result, negate<double>());  
     break;  
     case POS:  
     // it doesn't mean anything for delayed.  
     // it will just trigger a deep copy of the lazy object  
     throw DataException("Programmer error - POS not supported for lazy data.");  
     break;  
     case EXP:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);  
     break;  
     case SQRT:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);  
     break;  
     case RECIP:  
     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
     break;  
     case GZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
     break;  
     case LZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
     break;  
     case GEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
     break;  
     case LEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
     break;  
 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  
     case NEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));  
     break;  
     case EZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));  
     break;  
   
     default:  
     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
 /*  
   \brief Compute the value of the expression (reduction operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveReduction(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
   }  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);  
   double* result=&(v[offset]);  
   roffset=offset;  
   unsigned int ndpps=getNumDPPSample();  
   unsigned int psize=DataTypes::noValues(getShape());  
   switch (m_op)  
   {  
     case MINVAL:  
     {  
       for (unsigned int z=0;z<ndpps;++z)  
       {  
          FMin op;  
          *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max());  
          roffset+=psize;  
          result++;  
       }  
     }  
     break;  
     case MAXVAL:  
     {  
       for (unsigned int z=0;z<ndpps;++z)  
       {  
          FMax op;  
          *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max()*-1);  
          roffset+=psize;  
          result++;  
       }  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveReduction can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
   
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset=roffset+m_samplesize;  
 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t step=getNoValues();  
   switch (m_op)  
   {  
     case SYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     case NSYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case TRACE:  
     for (loop=0;loop<numsteps;++loop)  
     {  
 size_t zz=sampleNo*getNumDPPSample()+loop;  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "point=" <<  zz<< endl;)  
 LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)  
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     case TRANS:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
994    
995    
 /*  
   \brief Compute the value of the expression (unary operation with int params) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case SWAP:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
996    
997    
998    
# Line 1361  LAZYDEBUG(cout << " result=      " << re Line 1012  LAZYDEBUG(cout << " result=      " << re
1012        rroffset+=orightstep;\        rroffset+=orightstep;\
1013      }      }
1014    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // If both children are expanded, then we can process them in a single operation (we treat  
 // the whole sample as one big datapoint.  
 // If one of the children is not expanded, then we need to treat each point in the sample  
 // individually.  
 // There is an additional complication when scalar operations are considered.  
 // For example, 2+Vector.  
 // In this case each double within the point is treated individually  
 DataTypes::ValueType*  
 DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   if (!leftExp && !rightExp)  
   {  
     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");  
   }  
   bool leftScalar=(m_left->getRank()==0);  
   bool rightScalar=(m_right->getRank()==0);  
   if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))  
   {  
     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");  
   }  
   size_t leftsize=m_left->getNoValues();  
   size_t rightsize=m_right->getNoValues();  
   size_t chunksize=1;           // how many doubles will be processed in one go  
   int leftstep=0;       // how far should the left offset advance after each step  
   int rightstep=0;  
   int numsteps=0;       // total number of steps for the inner loop  
   int oleftstep=0;  // the o variables refer to the outer loop  
   int orightstep=0; // The outer loop is only required in cases where there is an extended scalar  
   int onumsteps=1;  
     
   bool LES=(leftExp && leftScalar); // Left is an expanded scalar  
   bool RES=(rightExp && rightScalar);  
   bool LS=(!leftExp && leftScalar); // left is a single scalar  
   bool RS=(!rightExp && rightScalar);  
   bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar  
   bool RN=(!rightExp && !rightScalar);  
   bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar  
   bool REN=(rightExp && !rightScalar);  
   
   if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars  
   {  
     chunksize=m_left->getNumDPPSample()*leftsize;  
     leftstep=0;  
     rightstep=0;  
     numsteps=1;  
   }  
   else if (LES || RES)  
   {  
     chunksize=1;  
     if (LES)        // left is an expanded scalar  
     {  
         if (RS)  
         {  
            leftstep=1;  
            rightstep=0;  
            numsteps=m_left->getNumDPPSample();  
         }  
         else        // RN or REN  
         {  
            leftstep=0;  
            oleftstep=1;  
            rightstep=1;  
            orightstep=(RN ? -(int)rightsize : 0);  
            numsteps=rightsize;  
            onumsteps=m_left->getNumDPPSample();  
         }  
     }  
     else        // right is an expanded scalar  
     {  
         if (LS)  
         {  
            rightstep=1;  
            leftstep=0;  
            numsteps=m_right->getNumDPPSample();  
         }  
         else  
         {  
            rightstep=0;  
            orightstep=1;  
            leftstep=1;  
            oleftstep=(LN ? -(int)leftsize : 0);  
            numsteps=leftsize;  
            onumsteps=m_right->getNumDPPSample();  
         }  
     }  
   }  
   else  // this leaves (LEN, RS), (LEN, RN) and their transposes  
   {  
     if (LEN)    // and Right will be a single value  
     {  
         chunksize=rightsize;  
         leftstep=rightsize;  
         rightstep=0;  
         numsteps=m_left->getNumDPPSample();  
         if (RS)  
         {  
            numsteps*=leftsize;  
         }  
     }  
     else    // REN  
     {  
         chunksize=leftsize;  
         rightstep=leftsize;  
         leftstep=0;  
         numsteps=m_right->getNumDPPSample();  
         if (LS)  
         {  
            numsteps*=rightsize;  
         }  
     }  
   }  
   
   int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0  
     // Get the values of sub-expressions  
   const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on  
     // calcBufss for why we can't put offset as the 2nd param above  
   const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  
 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  
 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  
 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)  
 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)  
 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  
 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  
   
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case ADD:  
         PROC_OP(NO_ARG,plus<double>());  
     break;  
     case SUB:  
     PROC_OP(NO_ARG,minus<double>());  
     break;  
     case MUL:  
     PROC_OP(NO_ARG,multiplies<double>());  
     break;  
     case DIV:  
     PROC_OP(NO_ARG,divides<double>());  
     break;  
     case POW:  
        PROC_OP(double (double,double),::pow);  
     break;  
     default:  
     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // unlike the other resolve helpers, we must treat these datapoints separately.  
 DataTypes::ValueType*  
 DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   int steps=getNumDPPSample();  
 /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
   int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method  
   int rightStep=(rightExp?m_right->getNoValues() : 0);  
   
   int resultStep=getNoValues();  
     // Get the values of sub-expressions (leave a gap of one sample for the result).  
   int gap=offset+m_samplesize;    
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
   
   const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);  
   gap+=m_left->getMaxSampleSize();  
   
   
 LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)  
   
   
   const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);  
   
 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  
 cout << getNoValues() << endl;)  
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
   
 for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  
 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  
 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  
 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)  
 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  
 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case PROD:  
     for (int i=0;i<steps;++i,resultp+=resultStep)  
     {  
   
 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)  
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
           const double *ptr_0 = &((*left)[lroffset]);  
           const double *ptr_1 = &((*right)[rroffset]);  
   
 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)  
 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)  
   
           matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
   
 LAZYDEBUG(cout << "Results--\n";  
 {  
   DataVector dv(getNoValues());  
 for (int z=0;z<getNoValues();++z)  
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
       lroffset+=leftStep;  
       rroffset+=rightStep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
 #ifdef LAZY_NODE_STORAGE  
1015    
1016  // The result will be stored in m_samples  // The result will be stored in m_samples
1017  // The return value is a pointer to the DataVector, offset is the offset within the return value  // The return value is a pointer to the DataVector, offset is the offset within the return value
# Line 1667  LAZYDEBUG(cout << "Resolve sample " << t Line 1027  LAZYDEBUG(cout << "Resolve sample " << t
1027    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
1028    {    {
1029      const ValueType& vec=m_id->getVectorRO();      const ValueType& vec=m_id->getVectorRO();
 //     if (m_readytype=='C')  
 //     {  
 //  roffset=0;      // all samples read from the same position  
 //  return &(m_samples);  
 //     }  
1030      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
1031    #ifdef LAZY_STACK_PROF
1032    int x;
1033    if (&x<stackend[omp_get_thread_num()])
1034    {
1035           stackend[omp_get_thread_num()]=&x;
1036    }
1037    #endif
1038      return &(vec);      return &(vec);
1039    }    }
1040    if (m_readytype!='E')    if (m_readytype!='E')
# Line 1685  LAZYDEBUG(cout << "Resolve sample " << t Line 1047  LAZYDEBUG(cout << "Resolve sample " << t
1047      return &(m_samples);        // sample is already resolved      return &(m_samples);        // sample is already resolved
1048    }    }
1049    m_sampleids[tid]=sampleNo;    m_sampleids[tid]=sampleNo;
1050    
1051    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1052    {    {
1053    case G_UNARY:    case G_UNARY:
# Line 1695  LAZYDEBUG(cout << "Resolve sample " << t Line 1058  LAZYDEBUG(cout << "Resolve sample " << t
1058    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1059    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1060    case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);    case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1061      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1062    default:    default:
1063      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1064    }    }
# Line 1852  DataLazy::resolveNodeReduction(int tid, Line 1216  DataLazy::resolveNodeReduction(int tid,
1216    
1217    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1218    unsigned int ndpps=getNumDPPSample();    unsigned int ndpps=getNumDPPSample();
1219    unsigned int psize=DataTypes::noValues(getShape());    unsigned int psize=DataTypes::noValues(m_left->getShape());
1220    double* result=&(m_samples[roffset]);    double* result=&(m_samples[roffset]);
1221    switch (m_op)    switch (m_op)
1222    {    {
# Line 2013  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1377  DataLazy::resolveNodeNP1OUT_2P(int tid,
1377    return &m_samples;    return &m_samples;
1378  }  }
1379    
1380    const DataTypes::ValueType*
1381    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset)
1382    {
1383      if (m_readytype!='E')
1384      {
1385        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1386      }
1387      if (m_op!=CONDEVAL)
1388      {
1389        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1390      }
1391      size_t subroffset;
1392    
1393      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1394      const ValueType* srcres=0;
1395      if ((*maskres)[subroffset]>0)
1396      {
1397        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1398      }
1399      else
1400      {
1401        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1402      }
1403    
1404      // Now we need to copy the result
1405    
1406      roffset=m_samplesize*tid;
1407      for (int i=0;i<m_samplesize;++i)
1408      {
1409        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1410      }
1411    
1412      return &m_samples;
1413    }
1414    
1415  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1416  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
# Line 2152  LAZYDEBUG(cout << "Right res["<< rroffse Line 1549  LAZYDEBUG(cout << "Right res["<< rroffse
1549    
1550    
1551    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1552    double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved    double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1553    switch(m_op)    switch(m_op)
1554    {    {
1555      case ADD:      case ADD:
# Line 2214  LAZYDEBUG(cout << "m_samplesize=" << m_s Line 1611  LAZYDEBUG(cout << "m_samplesize=" << m_s
1611  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1612  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1613    
1614    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1615    switch(m_op)    switch(m_op)
1616    {    {
1617      case PROD:      case PROD:
# Line 2238  LAZYDEBUG(cout << DataTypes::pointToStri Line 1635  LAZYDEBUG(cout << DataTypes::pointToStri
1635    roffset=offset;    roffset=offset;
1636    return &m_samples;    return &m_samples;
1637  }  }
 #endif //LAZY_NODE_STORAGE  
   
 /*  
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   case G_REDUCTION: return resolveReduction(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
1638    
 }  
1639    
1640  const DataTypes::ValueType*  const DataTypes::ValueType*
1641  DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset)
1642  {  {
1643  #ifdef _OPENMP  #ifdef _OPENMP
1644      int tid=omp_get_thread_num();      int tid=omp_get_thread_num();
1645  #else  #else
1646      int tid=0;      int tid=0;
1647  #endif  #endif
1648  #ifdef LAZY_NODE_STORAGE  
1649      return resolveNodeSample(tid, sampleNo, roffset);  #ifdef LAZY_STACK_PROF
1650        stackstart[tid]=&tid;
1651        stackend[tid]=&tid;
1652        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1653        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1654        #pragma omp critical
1655        if (d>maxstackuse)
1656        {
1657    cout << "Max resolve Stack use " << d << endl;
1658            maxstackuse=d;
1659        }
1660        return r;
1661  #else  #else
1662      return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);      return resolveNodeSample(tid, sampleNo, roffset);
1663  #endif  #endif
1664  }  }
1665    
# Line 2320  DataLazy::resolveToIdentity() Line 1670  DataLazy::resolveToIdentity()
1670  {  {
1671     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1672      return;      return;
 #ifndef LAZY_NODE_STORAGE  
    DataReady_ptr p=resolveVectorWorker();  
 #else  
1673     DataReady_ptr p=resolveNodeWorker();     DataReady_ptr p=resolveNodeWorker();
 #endif  
1674     makeIdentity(p);     makeIdentity(p);
1675  }  }
1676    
# Line 2340  void DataLazy::makeIdentity(const DataRe Line 1686  void DataLazy::makeIdentity(const DataRe
1686     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1687     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1688     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1689     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1690     m_left.reset();     m_left.reset();
1691     m_right.reset();     m_right.reset();
1692  }  }
# Line 2355  DataLazy::resolve() Line 1699  DataLazy::resolve()
1699      return m_id;      return m_id;
1700  }  }
1701    
 #ifdef LAZY_NODE_STORAGE  
1702    
1703  // This version of resolve uses storage in each node to hold results  /* This is really a static method but I think that caused problems in windows */
1704  DataReady_ptr  void
1705  DataLazy::resolveNodeWorker()  DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1706  {  {
1707    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (dats.empty())
1708    {    {
1709      collapse();      return;
1710    }    }
1711    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    vector<DataLazy*> work;
1712      FunctionSpace fs=dats[0]->getFunctionSpace();
1713      bool match=true;
1714      for (int i=dats.size()-1;i>=0;--i)
1715    {    {
1716      return m_id;      if (dats[i]->m_readytype!='E')
1717        {
1718            dats[i]->collapse();
1719        }
1720        if (dats[i]->m_op!=IDENTITY)
1721        {
1722            work.push_back(dats[i]);
1723            if (fs!=dats[i]->getFunctionSpace())
1724            {
1725                match=false;
1726            }
1727        }
1728    }    }
1729      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'    if (work.empty())
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVectorRW();  
   DataReady_ptr resptr=DataReady_ptr(result);  
   
   int sample;  
   int totalsamples=getNumSamples();  
   const ValueType* res=0;   // Storage for answer  
 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  
   #pragma omp parallel for private(sample,res) schedule(static)  
   for (sample=0;sample<totalsamples;++sample)  
1730    {    {
1731      size_t roffset=0;      return;     // no work to do
1732      }
1733      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1734      {     // it is possible that dats[0] is one of the objects which we discarded and
1735            // all the other functionspaces match.
1736        vector<DataExpanded*> dep;
1737        vector<ValueType*> vecs;
1738        for (int i=0;i<work.size();++i)
1739        {
1740            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1741            vecs.push_back(&(dep[i]->getVectorRW()));
1742        }
1743        int totalsamples=work[0]->getNumSamples();
1744        const ValueType* res=0; // Storage for answer
1745        int sample;
1746        #pragma omp parallel private(sample, res)
1747        {
1748            size_t roffset=0;
1749            #pragma omp for schedule(static)
1750            for (sample=0;sample<totalsamples;++sample)
1751            {
1752            roffset=0;
1753            int j;
1754            for (j=work.size()-1;j>=0;--j)
1755            {
1756  #ifdef _OPENMP  #ifdef _OPENMP
1757      res=resolveNodeSample(omp_get_thread_num(),sample,roffset);                  res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1758  #else  #else
1759      res=resolveNodeSample(0,sample,roffset);                  res=work[j]->resolveNodeSample(0,sample,roffset);
1760  #endif  #endif
1761  LAZYDEBUG(cout << "Sample #" << sample << endl;)                  DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1762  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )                  memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1763      DataVector::size_type outoffset=result->getPointOffset(sample,0);          }
1764      memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));          }
1765        }
1766        // Now we need to load the new results as identity ops into the lazy nodes
1767        for (int i=work.size()-1;i>=0;--i)
1768        {
1769            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1770        }
1771      }
1772      else  // functionspaces do not match
1773      {
1774        for (int i=0;i<work.size();++i)
1775        {
1776            work[i]->resolveToIdentity();
1777        }
1778    }    }
   return resptr;  
1779  }  }
1780    
 #endif // LAZY_NODE_STORAGE  
1781    
1782  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1783  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1784  DataReady_ptr  DataReady_ptr
1785  DataLazy::resolveVectorWorker()  DataLazy::resolveNodeWorker()
1786  {  {
   
 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  
1787    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1788    {    {
1789      collapse();      collapse();
# Line 2414  LAZYDEBUG(cout << "Buffers=" << m_buffsR Line 1793  LAZYDEBUG(cout << "Buffers=" << m_buffsR
1793      return m_id;      return m_id;
1794    }    }
1795      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
1796    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1797    ValueType& resvec=result->getVectorRW();    ValueType& resvec=result->getVectorRW();
1798    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1799    
1800    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1801    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1802    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
   size_t resoffset=0;       // where in the vector to find the answer  
1803  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1804    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1805    {    {
1806  LAZYDEBUG(cout << "################################# " << sample << endl;)      size_t roffset=0;
1807    #ifdef LAZY_STACK_PROF
1808        stackstart[omp_get_thread_num()]=&roffset;
1809        stackend[omp_get_thread_num()]=&roffset;
1810    #endif
1811        #pragma omp for schedule(static)
1812        for (sample=0;sample<totalsamples;++sample)
1813        {
1814            roffset=0;
1815  #ifdef _OPENMP  #ifdef _OPENMP
1816      res=resolveVectorSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1817  #else  #else
1818      res=resolveVectorSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1819  #endif  #endif
1820  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1821  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1822      outoffset=result->getPointOffset(sample,0);              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1823  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1824      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      }
1825      {    }
1826  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  #ifdef LAZY_STACK_PROF
1827      resvec[outoffset]=(*res)[resoffset];    for (int i=0;i<getNumberOfThreads();++i)
1828      }    {
1829  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)      size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1830  LAZYDEBUG(cerr << "*********************************" << endl;)  //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1831        if (r>maxstackuse)
1832        {
1833            maxstackuse=r;
1834        }
1835    }    }
1836      cout << "Max resolve Stack use=" << maxstackuse << endl;
1837    #endif
1838    return resptr;    return resptr;
1839  }  }
1840    
# Line 2459  std::string Line 1842  std::string
1842  DataLazy::toString() const  DataLazy::toString() const
1843  {  {
1844    ostringstream oss;    ostringstream oss;
1845    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1846    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1847      {
1848      case 1:   // tree format
1849        oss << endl;
1850        intoTreeString(oss,"");
1851        break;
1852      case 2:   // just the depth
1853        break;
1854      default:
1855        intoString(oss);
1856        break;
1857      }
1858    return oss.str();    return oss.str();
1859  }  }
1860    
# Line 2519  DataLazy::intoString(ostringstream& oss) Line 1913  DataLazy::intoString(ostringstream& oss)
1913      oss << ", " << m_axis_offset << ", " << m_transpose;      oss << ", " << m_axis_offset << ", " << m_transpose;
1914      oss << ')';      oss << ')';
1915      break;      break;
1916      case G_CONDEVAL:
1917        oss << opToString(m_op)<< '(' ;
1918        m_mask->intoString(oss);
1919        oss << " ? ";
1920        m_left->intoString(oss);
1921        oss << " : ";
1922        m_right->intoString(oss);
1923        oss << ')';
1924        break;
1925    default:    default:
1926      oss << "UNKNOWN";      oss << "UNKNOWN";
1927    }    }
1928  }  }
1929    
1930    
1931    void
1932    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1933    {
1934      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1935      switch (getOpgroup(m_op))
1936      {
1937      case G_IDENTITY:
1938        if (m_id->isExpanded())
1939        {
1940           oss << "E";
1941        }
1942        else if (m_id->isTagged())
1943        {
1944          oss << "T";
1945        }
1946        else if (m_id->isConstant())
1947        {
1948          oss << "C";
1949        }
1950        else
1951        {
1952          oss << "?";
1953        }
1954        oss << '@' << m_id.get() << endl;
1955        break;
1956      case G_BINARY:
1957        oss << opToString(m_op) << endl;
1958        indent+='.';
1959        m_left->intoTreeString(oss, indent);
1960        m_right->intoTreeString(oss, indent);
1961        break;
1962      case G_UNARY:
1963      case G_UNARY_P:
1964      case G_NP1OUT:
1965      case G_NP1OUT_P:
1966      case G_REDUCTION:
1967        oss << opToString(m_op) << endl;
1968        indent+='.';
1969        m_left->intoTreeString(oss, indent);
1970        break;
1971      case G_TENSORPROD:
1972        oss << opToString(m_op) << endl;
1973        indent+='.';
1974        m_left->intoTreeString(oss, indent);
1975        m_right->intoTreeString(oss, indent);
1976        break;
1977      case G_NP1OUT_2P:
1978        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1979        indent+='.';
1980        m_left->intoTreeString(oss, indent);
1981        break;
1982      default:
1983        oss << "UNKNOWN";
1984      }
1985    }
1986    
1987    
1988  DataAbstract*  DataAbstract*
1989  DataLazy::deepCopy()  DataLazy::deepCopy()
1990  {  {

Legend:
Removed from v.2734  
changed lines
  Added in v.4286

  ViewVC Help
Powered by ViewVC 1.1.26