/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2514 by jfenwick, Fri Jul 3 00:57:45 2009 UTC trunk/escriptcore/src/DataLazy.cpp revision 5863 by jfenwick, Wed Jan 13 02:25:48 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
22  #ifdef _OPENMP  #ifdef _OPENMP
23  #include <omp.h>  #include <omp.h>
24  #endif  #endif
# Line 30  Line 30 
30    
31  #include "EscriptParams.h"  #include "EscriptParams.h"
32    
33    #ifdef USE_NETCDF
34    #include <netcdfcpp.h>
35    #endif
36    
37  #include <iomanip>      // for some fancy formatting in debug  #include <iomanip>      // for some fancy formatting in debug
38    
39  // #define LAZYDEBUG(X) if (privdebug){X;}  // #define LAZYDEBUG(X) if (privdebug){X;}
# Line 44  bool privdebug=false; Line 48  bool privdebug=false;
48    
49  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
50    
51  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
52    
53    
54    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
55    
56  /*  /*
57  How does DataLazy work?  How does DataLazy work?
58  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 58  A special operation, IDENTITY, stores an Line 64  A special operation, IDENTITY, stores an
64  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
65    
66  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
67  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
68    
69  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
70  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 67  I will refer to individual DataLazy obje Line 73  I will refer to individual DataLazy obje
73  Each node also stores:  Each node also stores:
74  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
75      evaluated.      evaluated.
76  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
77      evaluate the expression.      evaluate the expression.
78  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
79    
# Line 109  namespace escript Line 115  namespace escript
115  namespace  namespace
116  {  {
117    
118    
119    // enabling this will print out when ever the maximum stacksize used by resolve increases
120    // it assumes _OPENMP is also in use
121    //#define LAZY_STACK_PROF
122    
123    
124    
125    #ifndef _OPENMP
126      #ifdef LAZY_STACK_PROF
127      #undef LAZY_STACK_PROF
128      #endif
129    #endif
130    
131    
132    #ifdef LAZY_STACK_PROF
133    std::vector<void*> stackstart(getNumberOfThreads());
134    std::vector<void*> stackend(getNumberOfThreads());
135    size_t maxstackuse=0;
136    #endif
137    
138  enum ES_opgroup  enum ES_opgroup
139  {  {
140     G_UNKNOWN,     G_UNKNOWN,
# Line 119  enum ES_opgroup Line 145  enum ES_opgroup
145     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
146     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
147     G_TENSORPROD,    // general tensor product     G_TENSORPROD,    // general tensor product
148     G_NP1OUT_2P      // non-pointwise op with one output requiring two params     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
149       G_REDUCTION,     // non-pointwise unary op with a scalar output
150       G_CONDEVAL
151  };  };
152    
153    
# Line 134  string ES_opstrings[]={"UNKNOWN","IDENTI Line 162  string ES_opstrings[]={"UNKNOWN","IDENTI
162              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
163              "prod",              "prod",
164              "transpose", "trace",              "transpose", "trace",
165              "swapaxes"};              "swapaxes",
166  int ES_opcount=41;              "minval", "maxval",
167                "condEval"};
168    int ES_opcount=44;
169  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
170              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 145  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 175  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
175              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
176              G_TENSORPROD,              G_TENSORPROD,
177              G_NP1OUT_P, G_NP1OUT_P,              G_NP1OUT_P, G_NP1OUT_P,
178              G_NP1OUT_2P};              G_NP1OUT_2P,
179                G_REDUCTION, G_REDUCTION,
180                G_CONDEVAL};
181  inline  inline
182  ES_opgroup  ES_opgroup
183  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 166  resultFS(DataAbstract_ptr left, DataAbst Line 198  resultFS(DataAbstract_ptr left, DataAbst
198    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
199    if (l!=r)    if (l!=r)
200    {    {
201      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
202        if (res==1)
203      {      {
204      return l;      return l;
205      }      }
206      if (l.probeInterpolation(r))      if (res==-1)
207      {      {
208      return r;      return r;
209      }      }
# Line 190  resultShape(DataAbstract_ptr left, DataA Line 223  resultShape(DataAbstract_ptr left, DataA
223        {        {
224          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
225        }        }
226    
227        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
228        {        {
229          return right->getShape();          return right->getShape();
# Line 217  resultShape(DataAbstract_ptr left, ES_op Line 251  resultShape(DataAbstract_ptr left, ES_op
251          int rank=left->getRank();          int rank=left->getRank();
252          if (axis_offset<0 || axis_offset>rank)          if (axis_offset<0 || axis_offset>rank)
253          {          {
254                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
255              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
256              for (int i=0; i<rank; i++)              throw DataException(e.str());
257            }
258            for (int i=0; i<rank; i++)
259          {          {
260             int index = (axis_offset+i)%rank;             int index = (axis_offset+i)%rank;
261                 sh.push_back(s[index]); // Append to new shape             sh.push_back(s[index]); // Append to new shape
262              }          }
263          return sh;          return sh;
264         }         }
265      break;      break;
# Line 302  SwapShape(DataAbstract_ptr left, const i Line 338  SwapShape(DataAbstract_ptr left, const i
338          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
339       }       }
340       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
341          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
342            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
343            throw DataException(e.str());
344       }       }
345       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
346           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          stringstream e;
347            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
348            throw DataException(e.str());
349       }       }
350       if (axis0 == axis1) {       if (axis0 == axis1) {
351           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 394  GTPShape(DataAbstract_ptr left, DataAbst Line 434  GTPShape(DataAbstract_ptr left, DataAbst
434    return shape2;    return shape2;
435  }  }
436    
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
437  }   // end anonymous namespace  }   // end anonymous namespace
438    
439    
# Line 434  opToString(ES_optype op) Line 449  opToString(ES_optype op)
449    return ES_opstrings[op];    return ES_opstrings[op];
450  }  }
451    
 #ifdef LAZY_NODE_STORAGE  
452  void DataLazy::LazyNodeSetup()  void DataLazy::LazyNodeSetup()
453  {  {
454  #ifdef _OPENMP  #ifdef _OPENMP
# Line 451  void DataLazy::LazyNodeSetup() Line 465  void DataLazy::LazyNodeSetup()
465      m_sampleids[0]=-1;      m_sampleids[0]=-1;
466  #endif  // _OPENMP  #endif  // _OPENMP
467  }  }
 #endif   // LAZY_NODE_STORAGE  
468    
469    
470  // Creates an identity node  // Creates an identity node
471  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
472      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
 #ifdef LAZY_NODE_STORAGE  
473      ,m_sampleids(0),      ,m_sampleids(0),
474      m_samples(1)      m_samples(1)
 #endif  
475  {  {
476     if (p->isLazy())     if (p->isLazy())
477     {     {
# Line 480  LAZYDEBUG(cout << "(1)Lazy created with Line 491  LAZYDEBUG(cout << "(1)Lazy created with
491  }  }
492    
493  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
494      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
495      m_op(op),      m_op(op),
496      m_axis_offset(0),      m_axis_offset(0),
497      m_transpose(0),      m_transpose(0),
498      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
499  {  {
500     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
501     {     {
502      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
503     }     }
# Line 502  DataLazy::DataLazy(DataAbstract_ptr left Line 513  DataLazy::DataLazy(DataAbstract_ptr left
513     }     }
514     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
515     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
516     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
517     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
518     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
519     LazyNodeSetup();     LazyNodeSetup();
 #endif  
520     SIZELIMIT     SIZELIMIT
521  }  }
522    
# Line 576  LAZYDEBUG(cout << "Right " << right.get( Line 583  LAZYDEBUG(cout << "Right " << right.get(
583      m_readytype='C';      m_readytype='C';
584     }     }
585     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
586     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
587     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
588     LazyNodeSetup();     LazyNodeSetup();
 #endif  
589     SIZELIMIT     SIZELIMIT
590  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
591  }  }
# Line 646  DataLazy::DataLazy(DataAbstract_ptr left Line 649  DataLazy::DataLazy(DataAbstract_ptr left
649      m_readytype='C';      m_readytype='C';
650     }     }
651     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
652     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
653     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
 #ifdef LAZY_NODE_STORAGE  
654     LazyNodeSetup();     LazyNodeSetup();
 #endif  
655     SIZELIMIT     SIZELIMIT
656  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
657  }  }
# Line 680  DataLazy::DataLazy(DataAbstract_ptr left Line 679  DataLazy::DataLazy(DataAbstract_ptr left
679     }     }
680     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
681     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
682     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
683     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
684     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
685     LazyNodeSetup();     LazyNodeSetup();
 #endif  
686     SIZELIMIT     SIZELIMIT
687  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
688  }  }
# Line 714  DataLazy::DataLazy(DataAbstract_ptr left Line 709  DataLazy::DataLazy(DataAbstract_ptr left
709     }     }
710     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
711     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
712     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
713     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
714     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
715     LazyNodeSetup();     LazyNodeSetup();
 #endif  
716     SIZELIMIT     SIZELIMIT
717  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
718  }  }
# Line 749  DataLazy::DataLazy(DataAbstract_ptr left Line 740  DataLazy::DataLazy(DataAbstract_ptr left
740     }     }
741     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
742     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
743     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
744     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
745     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
 #ifdef LAZY_NODE_STORAGE  
746     LazyNodeSetup();     LazyNodeSetup();
 #endif  
747     SIZELIMIT     SIZELIMIT
748  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
749  }  }
750    
751  DataLazy::~DataLazy()  
752    namespace
753  {  {
 #ifdef LAZY_NODE_SETUP  
    delete[] m_sampleids;  
    delete[] m_samples;  
 #endif  
 }  
754    
755        inline int max3(int a, int b, int c)
756        {
757        int t=(a>b?a:b);
758        return (t>c?t:c);
759    
760  int      }
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
761  }  }
762    
763    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
764  size_t      : parent(left->getFunctionSpace(), left->getShape()),
765  DataLazy::getMaxSampleSize() const      m_op(CONDEVAL),
766        m_axis_offset(0),
767        m_transpose(0),
768        m_tol(0)
769  {  {
770      return m_maxsamplesize;  
771       DataLazy_ptr lmask;
772       DataLazy_ptr lleft;
773       DataLazy_ptr lright;
774       if (!mask->isLazy())
775       {
776        lmask=DataLazy_ptr(new DataLazy(mask));
777       }
778       else
779       {
780        lmask=dynamic_pointer_cast<DataLazy>(mask);
781       }
782       if (!left->isLazy())
783       {
784        lleft=DataLazy_ptr(new DataLazy(left));
785       }
786       else
787       {
788        lleft=dynamic_pointer_cast<DataLazy>(left);
789       }
790       if (!right->isLazy())
791       {
792        lright=DataLazy_ptr(new DataLazy(right));
793       }
794       else
795       {
796        lright=dynamic_pointer_cast<DataLazy>(right);
797       }
798       m_readytype=lmask->m_readytype;
799       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
800       {
801        throw DataException("Programmer Error - condEval arguments must have the same readytype");
802       }
803       m_left=lleft;
804       m_right=lright;
805       m_mask=lmask;
806       m_samplesize=getNumDPPSample()*getNoValues();
807       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
808       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
809       LazyNodeSetup();
810       SIZELIMIT
811    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
812  }  }
813    
814    
815    
816  size_t  DataLazy::~DataLazy()
 DataLazy::getSampleBufferSize() const  
817  {  {
818      return m_maxsamplesize*(max(1,m_buffsRequired));     delete[] m_sampleids;
819  }  }
820    
821    
822  /*  /*
823    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
824    This does the work for the collapse method.    This does the work for the collapse method.
825    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
826  */  */
827  DataReady_ptr  DataReady_ptr
828  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
829  {  {
830    if (m_readytype=='E')    if (m_readytype=='E')
831    { // this is more an efficiency concern than anything else    { // this is more an efficiency concern than anything else
# Line 933  DataLazy::collapseToReady() Line 961  DataLazy::collapseToReady()
961      case SWAP:      case SWAP:
962      result=left.swapaxes(m_axis_offset, m_transpose);      result=left.swapaxes(m_axis_offset, m_transpose);
963      break;      break;
964        case MINVAL:
965        result=left.minval();
966        break;
967        case MAXVAL:
968        result=left.minval();
969        break;
970      default:      default:
971      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
972    }    }
# Line 946  DataLazy::collapseToReady() Line 980  DataLazy::collapseToReady()
980     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
981  */  */
982  void  void
983  DataLazy::collapse()  DataLazy::collapse() const
984  {  {
985    if (m_op==IDENTITY)    if (m_op==IDENTITY)
986    {    {
# Line 960  DataLazy::collapse() Line 994  DataLazy::collapse()
994    m_op=IDENTITY;    m_op=IDENTITY;
995  }  }
996    
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");  
   }  
   const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);  
   const double* left=&((*vleft)[roffset]);  
   double* result=&(v[offset]);  
   roffset=offset;  
   switch (m_op)  
   {  
     case SIN:    
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);  
     break;  
     case COS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);  
     break;  
     case TAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);  
     break;  
     case ASIN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);  
     break;  
     case ACOS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);  
     break;  
     case ATAN:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);  
     break;  
     case SINH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);  
     break;  
     case COSH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);  
     break;  
     case TANH:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);  
     break;  
     case ERF:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::erf);  
     break;  
 #endif  
    case ASINH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::asinh);  
 #endif    
     break;  
    case ACOSH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::acosh);  
 #endif    
     break;  
    case ATANH:  
 #if defined (_WIN32) && !defined(__INTEL_COMPILER)  
     tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);  
 #else  
     tensor_unary_operation(m_samplesize, left, result, ::atanh);  
 #endif    
     break;  
     case LOG10:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);  
     break;  
     case LOG:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);  
     break;  
     case SIGN:  
     tensor_unary_operation(m_samplesize, left, result, escript::fsign);  
     break;  
     case ABS:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);  
     break;  
     case NEG:  
     tensor_unary_operation(m_samplesize, left, result, negate<double>());  
     break;  
     case POS:  
     // it doesn't mean anything for delayed.  
     // it will just trigger a deep copy of the lazy object  
     throw DataException("Programmer error - POS not supported for lazy data.");  
     break;  
     case EXP:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);  
     break;  
     case SQRT:  
     tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);  
     break;  
     case RECIP:  
     tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));  
     break;  
     case GZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));  
     break;  
     case LZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));  
     break;  
     case GEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));  
     break;  
     case LEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));  
     break;  
 // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently  
     case NEZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));  
     break;  
     case EZ:  
     tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));  
     break;  
   
     default:  
     throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
   
   
997    
998    
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset=roffset+m_samplesize;  
 LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)  
   const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t step=getNoValues();  
   switch (m_op)  
   {  
     case SYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     case NSYM:  
     for (loop=0;loop<numsteps;++loop)  
     {  
         DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);  
         subroffset+=step;  
         offset+=step;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case TRACE:  
     for (loop=0;loop<numsteps;++loop)  
     {  
 size_t zz=sampleNo*getNumDPPSample()+loop;  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "point=" <<  zz<< endl;)  
 LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)  
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     case TRANS:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
   
   
 /*  
   \brief Compute the value of the expression (unary operation with int params) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
 {  
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");  
   }  
     // since we can't write the result over the input, we need a result offset further along  
   size_t subroffset;  
   const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);  
 LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  
 LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  
   roffset=offset;  
   size_t loop=0;  
   size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;  
   size_t outstep=getNoValues();  
   size_t instep=m_left->getNoValues();  
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
   switch (m_op)  
   {  
     case SWAP:  
     for (loop=0;loop<numsteps;++loop)  
     {  
             DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);  
         subroffset+=instep;  
         offset+=outstep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");  
   }  
   return &v;  
 }  
999    
1000    
1001    
# Line 1295  LAZYDEBUG(cout << " result=      " << re Line 1015  LAZYDEBUG(cout << " result=      " << re
1015        rroffset+=orightstep;\        rroffset+=orightstep;\
1016      }      }
1017    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // If both children are expanded, then we can process them in a single operation (we treat  
 // the whole sample as one big datapoint.  
 // If one of the children is not expanded, then we need to treat each point in the sample  
 // individually.  
 // There is an additional complication when scalar operations are considered.  
 // For example, 2+Vector.  
 // In this case each double within the point is treated individually  
 DataTypes::ValueType*  
 DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   if (!leftExp && !rightExp)  
   {  
     throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");  
   }  
   bool leftScalar=(m_left->getRank()==0);  
   bool rightScalar=(m_right->getRank()==0);  
   if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))  
   {  
     throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");  
   }  
   size_t leftsize=m_left->getNoValues();  
   size_t rightsize=m_right->getNoValues();  
   size_t chunksize=1;           // how many doubles will be processed in one go  
   int leftstep=0;       // how far should the left offset advance after each step  
   int rightstep=0;  
   int numsteps=0;       // total number of steps for the inner loop  
   int oleftstep=0;  // the o variables refer to the outer loop  
   int orightstep=0; // The outer loop is only required in cases where there is an extended scalar  
   int onumsteps=1;  
     
   bool LES=(leftExp && leftScalar); // Left is an expanded scalar  
   bool RES=(rightExp && rightScalar);  
   bool LS=(!leftExp && leftScalar); // left is a single scalar  
   bool RS=(!rightExp && rightScalar);  
   bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar  
   bool RN=(!rightExp && !rightScalar);  
   bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar  
   bool REN=(rightExp && !rightScalar);  
   
   if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars  
   {  
     chunksize=m_left->getNumDPPSample()*leftsize;  
     leftstep=0;  
     rightstep=0;  
     numsteps=1;  
   }  
   else if (LES || RES)  
   {  
     chunksize=1;  
     if (LES)        // left is an expanded scalar  
     {  
         if (RS)  
         {  
            leftstep=1;  
            rightstep=0;  
            numsteps=m_left->getNumDPPSample();  
         }  
         else        // RN or REN  
         {  
            leftstep=0;  
            oleftstep=1;  
            rightstep=1;  
            orightstep=(RN ? -(int)rightsize : 0);  
            numsteps=rightsize;  
            onumsteps=m_left->getNumDPPSample();  
         }  
     }  
     else        // right is an expanded scalar  
     {  
         if (LS)  
         {  
            rightstep=1;  
            leftstep=0;  
            numsteps=m_right->getNumDPPSample();  
         }  
         else  
         {  
            rightstep=0;  
            orightstep=1;  
            leftstep=1;  
            oleftstep=(LN ? -(int)leftsize : 0);  
            numsteps=leftsize;  
            onumsteps=m_right->getNumDPPSample();  
         }  
     }  
   }  
   else  // this leaves (LEN, RS), (LEN, RN) and their transposes  
   {  
     if (LEN)    // and Right will be a single value  
     {  
         chunksize=rightsize;  
         leftstep=rightsize;  
         rightstep=0;  
         numsteps=m_left->getNumDPPSample();  
         if (RS)  
         {  
            numsteps*=leftsize;  
         }  
     }  
     else    // REN  
     {  
         chunksize=leftsize;  
         rightstep=leftsize;  
         leftstep=0;  
         numsteps=m_right->getNumDPPSample();  
         if (LS)  
         {  
            numsteps*=rightsize;  
         }  
     }  
   }  
   
   int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0  
     // Get the values of sub-expressions  
   const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on  
     // calcBufss for why we can't put offset as the 2nd param above  
   const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
 LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  
 LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  
 LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  
 LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)  
 LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)  
 LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  
 LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  
   
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case ADD:  
         PROC_OP(NO_ARG,plus<double>());  
     break;  
     case SUB:  
     PROC_OP(NO_ARG,minus<double>());  
     break;  
     case MUL:  
     PROC_OP(NO_ARG,multiplies<double>());  
     break;  
     case DIV:  
     PROC_OP(NO_ARG,divides<double>());  
     break;  
     case POW:  
        PROC_OP(double (double,double),::pow);  
     break;  
     default:  
     throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  
 // have already been collapsed to IDENTITY. So we must have at least one expanded child.  
 // unlike the other resolve helpers, we must treat these datapoints separately.  
 DataTypes::ValueType*  
 DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  
 {  
 LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  
   
   size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors  
     // first work out which of the children are expanded  
   bool leftExp=(m_left->m_readytype=='E');  
   bool rightExp=(m_right->m_readytype=='E');  
   int steps=getNumDPPSample();  
 /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
   int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method  
   int rightStep=(rightExp?m_right->getNoValues() : 0);  
   
   int resultStep=getNoValues();  
     // Get the values of sub-expressions (leave a gap of one sample for the result).  
   int gap=offset+m_samplesize;    
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
   
   const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);  
   gap+=m_left->getMaxSampleSize();  
   
   
 LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)  
   
   
   const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);  
   
 LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  
 cout << getNoValues() << endl;)  
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
   
 for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
 LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  
 LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  
 LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  
 LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)  
 LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  
 LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  
   
   double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
   switch(m_op)  
   {  
     case PROD:  
     for (int i=0;i<steps;++i,resultp+=resultStep)  
     {  
   
 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)  
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
           const double *ptr_0 = &((*left)[lroffset]);  
           const double *ptr_1 = &((*right)[rroffset]);  
   
 LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)  
 LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)  
   
           matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);  
   
 LAZYDEBUG(cout << "Results--\n";  
 {  
   DataVector dv(getNoValues());  
 for (int z=0;z<getNoValues();++z)  
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
       lroffset+=leftStep;  
       rroffset+=rightStep;  
     }  
     break;  
     default:  
     throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");  
   }  
   roffset=offset;  
   return &v;  
 }  
   
   
 #ifdef LAZY_NODE_STORAGE  
1018    
1019  // The result will be stored in m_samples  // The result will be stored in m_samples
1020  // The return value is a pointer to the DataVector, offset is the offset within the return value  // The return value is a pointer to the DataVector, offset is the offset within the return value
1021  const DataTypes::ValueType*  const DataTypes::ValueType*
1022  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1023  {  {
1024  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1025      // collapse so we have a 'E' node or an IDENTITY for some other type      // collapse so we have a 'E' node or an IDENTITY for some other type
# Line 1601  LAZYDEBUG(cout << "Resolve sample " << t Line 1030  LAZYDEBUG(cout << "Resolve sample " << t
1030    if (m_op==IDENTITY)      if (m_op==IDENTITY)  
1031    {    {
1032      const ValueType& vec=m_id->getVectorRO();      const ValueType& vec=m_id->getVectorRO();
 //     if (m_readytype=='C')  
 //     {  
 //  roffset=0;      // all samples read from the same position  
 //  return &(m_samples);  
 //     }  
1033      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
1034    #ifdef LAZY_STACK_PROF
1035    int x;
1036    if (&x<stackend[omp_get_thread_num()])
1037    {
1038           stackend[omp_get_thread_num()]=&x;
1039    }
1040    #endif
1041      return &(vec);      return &(vec);
1042    }    }
1043    if (m_readytype!='E')    if (m_readytype!='E')
# Line 1619  LAZYDEBUG(cout << "Resolve sample " << t Line 1050  LAZYDEBUG(cout << "Resolve sample " << t
1050      return &(m_samples);        // sample is already resolved      return &(m_samples);        // sample is already resolved
1051    }    }
1052    m_sampleids[tid]=sampleNo;    m_sampleids[tid]=sampleNo;
1053    
1054    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1055    {    {
1056    case G_UNARY:    case G_UNARY:
# Line 1628  LAZYDEBUG(cout << "Resolve sample " << t Line 1060  LAZYDEBUG(cout << "Resolve sample " << t
1060    case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);    case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1061    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);    case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1062    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);    case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1063      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1064      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1065    default:    default:
1066      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1067    }    }
1068  }  }
1069    
1070  const DataTypes::ValueType*  const DataTypes::ValueType*
1071  DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1072  {  {
1073      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1074      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
# Line 1766  DataLazy::resolveNodeUnary(int tid, int Line 1200  DataLazy::resolveNodeUnary(int tid, int
1200    
1201    
1202  const DataTypes::ValueType*  const DataTypes::ValueType*
1203  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1204    {
1205        // we assume that any collapsing has been done before we get here
1206        // since we only have one argument we don't need to think about only
1207        // processing single points.
1208        // we will also know we won't get identity nodes
1209      if (m_readytype!='E')
1210      {
1211        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1212      }
1213      if (m_op==IDENTITY)
1214      {
1215        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1216      }
1217      size_t loffset=0;
1218      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1219    
1220      roffset=m_samplesize*tid;
1221      unsigned int ndpps=getNumDPPSample();
1222      unsigned int psize=DataTypes::noValues(m_left->getShape());
1223      double* result=&(m_samples[roffset]);
1224      switch (m_op)
1225      {
1226        case MINVAL:
1227        {
1228          for (unsigned int z=0;z<ndpps;++z)
1229          {
1230            FMin op;
1231            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1232            loffset+=psize;
1233            result++;
1234          }
1235        }
1236        break;
1237        case MAXVAL:
1238        {
1239          for (unsigned int z=0;z<ndpps;++z)
1240          {
1241          FMax op;
1242          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1243          loffset+=psize;
1244          result++;
1245          }
1246        }
1247        break;
1248        default:
1249        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1250      }
1251      return &(m_samples);
1252    }
1253    
1254    const DataTypes::ValueType*
1255    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1256  {  {
1257      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1258      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
# Line 1811  DataLazy::resolveNodeNP1OUT(int tid, int Line 1297  DataLazy::resolveNodeNP1OUT(int tid, int
1297  }  }
1298    
1299  const DataTypes::ValueType*  const DataTypes::ValueType*
1300  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1301  {  {
1302      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1303      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
# Line 1859  DataLazy::resolveNodeNP1OUT_P(int tid, i Line 1345  DataLazy::resolveNodeNP1OUT_P(int tid, i
1345    
1346    
1347  const DataTypes::ValueType*  const DataTypes::ValueType*
1348  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
1349  {  {
1350    if (m_readytype!='E')    if (m_readytype!='E')
1351    {    {
# Line 1894  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1380  DataLazy::resolveNodeNP1OUT_2P(int tid,
1380    return &m_samples;    return &m_samples;
1381  }  }
1382    
1383    const DataTypes::ValueType*
1384    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1385    {
1386      if (m_readytype!='E')
1387      {
1388        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1389      }
1390      if (m_op!=CONDEVAL)
1391      {
1392        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1393      }
1394      size_t subroffset;
1395    
1396      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1397      const ValueType* srcres=0;
1398      if ((*maskres)[subroffset]>0)
1399      {
1400        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1401      }
1402      else
1403      {
1404        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1405      }
1406    
1407      // Now we need to copy the result
1408    
1409      roffset=m_samplesize*tid;
1410      for (int i=0;i<m_samplesize;++i)
1411      {
1412        m_samples[roffset+i]=(*srcres)[subroffset+i];  
1413      }
1414    
1415      return &m_samples;
1416    }
1417    
1418  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1419  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
# Line 1906  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1425  DataLazy::resolveNodeNP1OUT_2P(int tid,
1425  // For example, 2+Vector.  // For example, 2+Vector.
1426  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1427  const DataTypes::ValueType*  const DataTypes::ValueType*
1428  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1429  {  {
1430  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1431    
# Line 2033  LAZYDEBUG(cout << "Right res["<< rroffse Line 1552  LAZYDEBUG(cout << "Right res["<< rroffse
1552    
1553    
1554    roffset=m_samplesize*tid;    roffset=m_samplesize*tid;
1555    double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved    double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1556    switch(m_op)    switch(m_op)
1557    {    {
1558      case ADD:      case ADD:
# Line 2063  LAZYDEBUG(cout << "Result res[" << roffs Line 1582  LAZYDEBUG(cout << "Result res[" << roffs
1582  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1583  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1584  const DataTypes::ValueType*  const DataTypes::ValueType*
1585  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1586  {  {
1587  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1588    
# Line 2095  LAZYDEBUG(cout << "m_samplesize=" << m_s Line 1614  LAZYDEBUG(cout << "m_samplesize=" << m_s
1614  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1615  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1616    
1617    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1618    switch(m_op)    switch(m_op)
1619    {    {
1620      case PROD:      case PROD:
# Line 2119  LAZYDEBUG(cout << DataTypes::pointToStri Line 1638  LAZYDEBUG(cout << DataTypes::pointToStri
1638    roffset=offset;    roffset=offset;
1639    return &m_samples;    return &m_samples;
1640  }  }
 #endif //LAZY_NODE_STORAGE  
   
 /*  
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
1641    
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
1642    
 // the roffset is the offset within the returned vector where the data begins  
1643  const DataTypes::ValueType*  const DataTypes::ValueType*
1644  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset) const
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
   
 }  
   
 const DataTypes::ValueType*  
 DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  
1645  {  {
1646  #ifdef _OPENMP  #ifdef _OPENMP
1647      int tid=omp_get_thread_num();      int tid=omp_get_thread_num();
1648  #else  #else
1649      int tid=0;      int tid=0;
1650  #endif  #endif
1651      return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);  
1652    #ifdef LAZY_STACK_PROF
1653        stackstart[tid]=&tid;
1654        stackend[tid]=&tid;
1655        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1656        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1657        #pragma omp critical
1658        if (d>maxstackuse)
1659        {
1660    cout << "Max resolve Stack use " << d << endl;
1661            maxstackuse=d;
1662        }
1663        return r;
1664    #else
1665        return resolveNodeSample(tid, sampleNo, roffset);
1666    #endif
1667  }  }
1668    
1669    
# Line 2196  DataLazy::resolveToIdentity() Line 1673  DataLazy::resolveToIdentity()
1673  {  {
1674     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1675      return;      return;
 #ifndef LAZY_NODE_STORAGE  
    DataReady_ptr p=resolveVectorWorker();  
 #else  
1676     DataReady_ptr p=resolveNodeWorker();     DataReady_ptr p=resolveNodeWorker();
 #endif  
1677     makeIdentity(p);     makeIdentity(p);
1678  }  }
1679    
# Line 2216  void DataLazy::makeIdentity(const DataRe Line 1689  void DataLazy::makeIdentity(const DataRe
1689     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1690     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1691     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1692     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1693     m_left.reset();     m_left.reset();
1694     m_right.reset();     m_right.reset();
1695  }  }
# Line 2231  DataLazy::resolve() Line 1702  DataLazy::resolve()
1702      return m_id;      return m_id;
1703  }  }
1704    
 #ifdef LAZY_NODE_STORAGE  
1705    
1706  // This version of resolve uses storage in each node to hold results  /* This is really a static method but I think that caused problems in windows */
1707  DataReady_ptr  void
1708  DataLazy::resolveNodeWorker()  DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1709  {  {
1710    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (dats.empty())
1711    {    {
1712      collapse();      return;
1713    }    }
1714    if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.    vector<DataLazy*> work;
1715      FunctionSpace fs=dats[0]->getFunctionSpace();
1716      bool match=true;
1717      for (int i=dats.size()-1;i>=0;--i)
1718    {    {
1719      return m_id;      if (dats[i]->m_readytype!='E')
1720        {
1721            dats[i]->collapse();
1722        }
1723        if (dats[i]->m_op!=IDENTITY)
1724        {
1725            work.push_back(dats[i]);
1726            if (fs!=dats[i]->getFunctionSpace())
1727            {
1728                match=false;
1729            }
1730        }
1731    }    }
1732      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'    if (work.empty())
   DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));  
   ValueType& resvec=result->getVectorRW();  
   DataReady_ptr resptr=DataReady_ptr(result);  
   
   int sample;  
   int totalsamples=getNumSamples();  
   const ValueType* res=0;   // Storage for answer  
 LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  
   #pragma omp parallel for private(sample,res) schedule(static)  
   for (sample=0;sample<totalsamples;++sample)  
1733    {    {
1734      size_t roffset=0;      return;     // no work to do
1735      }
1736      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1737      {     // it is possible that dats[0] is one of the objects which we discarded and
1738            // all the other functionspaces match.
1739        vector<DataExpanded*> dep;
1740        vector<ValueType*> vecs;
1741        for (int i=0;i<work.size();++i)
1742        {
1743            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1744            vecs.push_back(&(dep[i]->getVectorRW()));
1745        }
1746        int totalsamples=work[0]->getNumSamples();
1747        const ValueType* res=0; // Storage for answer
1748        int sample;
1749        #pragma omp parallel private(sample, res)
1750        {
1751            size_t roffset=0;
1752            #pragma omp for schedule(static)
1753            for (sample=0;sample<totalsamples;++sample)
1754            {
1755            roffset=0;
1756            int j;
1757            for (j=work.size()-1;j>=0;--j)
1758            {
1759  #ifdef _OPENMP  #ifdef _OPENMP
1760      res=resolveNodeSample(omp_get_thread_num(),sample,roffset);                  res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1761  #else  #else
1762      res=resolveNodeSample(0,sample,roffset);                  res=work[j]->resolveNodeSample(0,sample,roffset);
1763  #endif  #endif
1764  LAZYDEBUG(cout << "Sample #" << sample << endl;)                  DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1765  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )                  memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1766      DataVector::size_type outoffset=result->getPointOffset(sample,0);          }
1767      memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));          }
1768        }
1769        // Now we need to load the new results as identity ops into the lazy nodes
1770        for (int i=work.size()-1;i>=0;--i)
1771        {
1772            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1773        }
1774      }
1775      else  // functionspaces do not match
1776      {
1777        for (int i=0;i<work.size();++i)
1778        {
1779            work[i]->resolveToIdentity();
1780        }
1781    }    }
   return resptr;  
1782  }  }
1783    
 #endif // LAZY_NODE_STORAGE  
1784    
1785  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
1786  // Each sample is evaluated independently and copied into the result DataExpanded.  // This version of resolve uses storage in each node to hold results
1787  DataReady_ptr  DataReady_ptr
1788  DataLazy::resolveVectorWorker()  DataLazy::resolveNodeWorker()
1789  {  {
   
 LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
 LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  
1790    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1791    {    {
1792      collapse();      collapse();
# Line 2290  LAZYDEBUG(cout << "Buffers=" << m_buffsR Line 1796  LAZYDEBUG(cout << "Buffers=" << m_buffsR
1796      return m_id;      return m_id;
1797    }    }
1798      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
1799    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1800    ValueType& resvec=result->getVectorRW();    ValueType& resvec=result->getVectorRW();
1801    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1802    
1803    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1804    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1805    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
   size_t resoffset=0;       // where in the vector to find the answer  
1806  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1807    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1808    {    {
1809  LAZYDEBUG(cout << "################################# " << sample << endl;)      size_t roffset=0;
1810    #ifdef LAZY_STACK_PROF
1811        stackstart[omp_get_thread_num()]=&roffset;
1812        stackend[omp_get_thread_num()]=&roffset;
1813    #endif
1814        #pragma omp for schedule(static)
1815        for (sample=0;sample<totalsamples;++sample)
1816        {
1817            roffset=0;
1818  #ifdef _OPENMP  #ifdef _OPENMP
1819      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1820  #else  #else
1821      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1822  #endif  #endif
1823  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1824  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1825      outoffset=result->getPointOffset(sample,0);              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1826  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1827      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      }
1828      {    }
1829  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  #ifdef LAZY_STACK_PROF
1830      resvec[outoffset]=(*res)[resoffset];    for (int i=0;i<getNumberOfThreads();++i)
1831      }    {
1832  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)      size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1833  LAZYDEBUG(cerr << "*********************************" << endl;)  //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1834        if (r>maxstackuse)
1835        {
1836            maxstackuse=r;
1837        }
1838    }    }
1839      cout << "Max resolve Stack use=" << maxstackuse << endl;
1840    #endif
1841    return resptr;    return resptr;
1842  }  }
1843    
# Line 2335  std::string Line 1845  std::string
1845  DataLazy::toString() const  DataLazy::toString() const
1846  {  {
1847    ostringstream oss;    ostringstream oss;
1848    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1849    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1850      {
1851      case 1:   // tree format
1852        oss << endl;
1853        intoTreeString(oss,"");
1854        break;
1855      case 2:   // just the depth
1856        break;
1857      default:
1858        intoString(oss);
1859        break;
1860      }
1861    return oss.str();    return oss.str();
1862  }  }
1863    
# Line 2377  DataLazy::intoString(ostringstream& oss) Line 1898  DataLazy::intoString(ostringstream& oss)
1898    case G_UNARY_P:    case G_UNARY_P:
1899    case G_NP1OUT:    case G_NP1OUT:
1900    case G_NP1OUT_P:    case G_NP1OUT_P:
1901      case G_REDUCTION:
1902      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1903      m_left->intoString(oss);      m_left->intoString(oss);
1904      oss << ')';      oss << ')';
# Line 2394  DataLazy::intoString(ostringstream& oss) Line 1916  DataLazy::intoString(ostringstream& oss)
1916      oss << ", " << m_axis_offset << ", " << m_transpose;      oss << ", " << m_axis_offset << ", " << m_transpose;
1917      oss << ')';      oss << ')';
1918      break;      break;
1919      case G_CONDEVAL:
1920        oss << opToString(m_op)<< '(' ;
1921        m_mask->intoString(oss);
1922        oss << " ? ";
1923        m_left->intoString(oss);
1924        oss << " : ";
1925        m_right->intoString(oss);
1926        oss << ')';
1927        break;
1928    default:    default:
1929      oss << "UNKNOWN";      oss << "UNKNOWN";
1930    }    }
1931  }  }
1932    
1933    
1934    void
1935    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1936    {
1937      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1938      switch (getOpgroup(m_op))
1939      {
1940      case G_IDENTITY:
1941        if (m_id->isExpanded())
1942        {
1943           oss << "E";
1944        }
1945        else if (m_id->isTagged())
1946        {
1947          oss << "T";
1948        }
1949        else if (m_id->isConstant())
1950        {
1951          oss << "C";
1952        }
1953        else
1954        {
1955          oss << "?";
1956        }
1957        oss << '@' << m_id.get() << endl;
1958        break;
1959      case G_BINARY:
1960        oss << opToString(m_op) << endl;
1961        indent+='.';
1962        m_left->intoTreeString(oss, indent);
1963        m_right->intoTreeString(oss, indent);
1964        break;
1965      case G_UNARY:
1966      case G_UNARY_P:
1967      case G_NP1OUT:
1968      case G_NP1OUT_P:
1969      case G_REDUCTION:
1970        oss << opToString(m_op) << endl;
1971        indent+='.';
1972        m_left->intoTreeString(oss, indent);
1973        break;
1974      case G_TENSORPROD:
1975        oss << opToString(m_op) << endl;
1976        indent+='.';
1977        m_left->intoTreeString(oss, indent);
1978        m_right->intoTreeString(oss, indent);
1979        break;
1980      case G_NP1OUT_2P:
1981        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1982        indent+='.';
1983        m_left->intoTreeString(oss, indent);
1984        break;
1985      default:
1986        oss << "UNKNOWN";
1987      }
1988    }
1989    
1990    
1991  DataAbstract*  DataAbstract*
1992  DataLazy::deepCopy()  DataLazy::deepCopy()
1993  {  {
1994    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1995    {    {
1996    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1997    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1998      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1999      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2000    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2001    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2002    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2004      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2005    default:    default:
2006      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2007    }    }
2008  }  }
2009    
2010    
2011    
2012  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2013  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2014  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
# Line 2503  DataLazy::setToZero() Line 2097  DataLazy::setToZero()
2097  //   m_readytype='C';  //   m_readytype='C';
2098  //   m_buffsRequired=1;  //   m_buffsRequired=1;
2099    
2100    privdebug=privdebug;  // to stop the compiler complaining about unused privdebug    (void)privdebug;  // to stop the compiler complaining about unused privdebug
2101    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
2102  }  }
2103    

Legend:
Removed from v.2514  
changed lines
  Added in v.5863

  ViewVC Help
Powered by ViewVC 1.1.26