/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2496 by jfenwick, Fri Jun 26 06:09:47 2009 UTC trunk/escriptcore/src/DataLazy.cpp revision 5938 by jfenwick, Thu Feb 18 06:30:35 2016 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2016 by The University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
22  #ifdef _OPENMP  #ifdef _OPENMP
23  #include <omp.h>  #include <omp.h>
24  #endif  #endif
# Line 30  Line 30 
30    
31  #include "EscriptParams.h"  #include "EscriptParams.h"
32    
33    #ifdef USE_NETCDF
34    #include <netcdfcpp.h>
35    #endif
36    
37  #include <iomanip>      // for some fancy formatting in debug  #include <iomanip>      // for some fancy formatting in debug
38    
39    using namespace escript::DataTypes;
40    
41  // #define LAZYDEBUG(X) if (privdebug){X;}  // #define LAZYDEBUG(X) if (privdebug){X;}
42  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
43  namespace  namespace
# Line 42  bool privdebug=false; Line 48  bool privdebug=false;
48  #define DISABLEDEBUG privdebug=false;  #define DISABLEDEBUG privdebug=false;
49  }  }
50    
51  // #define SIZELIMIT  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
52  // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}  
53  //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
54    
55    
56  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
57    
58  /*  /*
59  How does DataLazy work?  How does DataLazy work?
# Line 59  A special operation, IDENTITY, stores an Line 66  A special operation, IDENTITY, stores an
66  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
67    
68  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
69  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
70    
71  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
72  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 68  I will refer to individual DataLazy obje Line 75  I will refer to individual DataLazy obje
75  Each node also stores:  Each node also stores:
76  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
77      evaluated.      evaluated.
78  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
79      evaluate the expression.      evaluate the expression.
80  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
81    
# Line 110  namespace escript Line 117  namespace escript
117  namespace  namespace
118  {  {
119    
120    
121    // enabling this will print out when ever the maximum stacksize used by resolve increases
122    // it assumes _OPENMP is also in use
123    //#define LAZY_STACK_PROF
124    
125    
126    
127    #ifndef _OPENMP
128      #ifdef LAZY_STACK_PROF
129      #undef LAZY_STACK_PROF
130      #endif
131    #endif
132    
133    
134    #ifdef LAZY_STACK_PROF
135    std::vector<void*> stackstart(getNumberOfThreads());
136    std::vector<void*> stackend(getNumberOfThreads());
137    size_t maxstackuse=0;
138    #endif
139    
140  enum ES_opgroup  enum ES_opgroup
141  {  {
142     G_UNKNOWN,     G_UNKNOWN,
# Line 120  enum ES_opgroup Line 147  enum ES_opgroup
147     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
148     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
149     G_TENSORPROD,    // general tensor product     G_TENSORPROD,    // general tensor product
150     G_NP1OUT_2P      // non-pointwise op with one output requiring two params     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
151       G_REDUCTION,     // non-pointwise unary op with a scalar output
152       G_CONDEVAL
153  };  };
154    
155    
# Line 135  string ES_opstrings[]={"UNKNOWN","IDENTI Line 164  string ES_opstrings[]={"UNKNOWN","IDENTI
164              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
165              "prod",              "prod",
166              "transpose", "trace",              "transpose", "trace",
167              "swapaxes"};              "swapaxes",
168  int ES_opcount=41;              "minval", "maxval",
169                "condEval"};
170    int ES_opcount=44;
171  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
172              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
173              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 146  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 177  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
177              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
178              G_TENSORPROD,              G_TENSORPROD,
179              G_NP1OUT_P, G_NP1OUT_P,              G_NP1OUT_P, G_NP1OUT_P,
180              G_NP1OUT_2P};              G_NP1OUT_2P,
181                G_REDUCTION, G_REDUCTION,
182                G_CONDEVAL};
183  inline  inline
184  ES_opgroup  ES_opgroup
185  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 167  resultFS(DataAbstract_ptr left, DataAbst Line 200  resultFS(DataAbstract_ptr left, DataAbst
200    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
201    if (l!=r)    if (l!=r)
202    {    {
203      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
204        if (res==1)
205      {      {
206      return l;      return l;
207      }      }
208      if (l.probeInterpolation(r))      if (res==-1)
209      {      {
210      return r;      return r;
211      }      }
# Line 191  resultShape(DataAbstract_ptr left, DataA Line 225  resultShape(DataAbstract_ptr left, DataA
225        {        {
226          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
227        }        }
228    
229        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
230        {        {
231          return right->getShape();          return right->getShape();
# Line 218  resultShape(DataAbstract_ptr left, ES_op Line 253  resultShape(DataAbstract_ptr left, ES_op
253          int rank=left->getRank();          int rank=left->getRank();
254          if (axis_offset<0 || axis_offset>rank)          if (axis_offset<0 || axis_offset>rank)
255          {          {
256                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
257              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
258              for (int i=0; i<rank; i++)              throw DataException(e.str());
259            }
260            for (int i=0; i<rank; i++)
261          {          {
262             int index = (axis_offset+i)%rank;             int index = (axis_offset+i)%rank;
263                 sh.push_back(s[index]); // Append to new shape             sh.push_back(s[index]); // Append to new shape
264              }          }
265          return sh;          return sh;
266         }         }
267      break;      break;
# Line 303  SwapShape(DataAbstract_ptr left, const i Line 340  SwapShape(DataAbstract_ptr left, const i
340          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
341       }       }
342       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
343          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
344            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
345            throw DataException(e.str());
346       }       }
347       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
348           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          stringstream e;
349            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
350            throw DataException(e.str());
351       }       }
352       if (axis0 == axis1) {       if (axis0 == axis1) {
353           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 395  GTPShape(DataAbstract_ptr left, DataAbst Line 436  GTPShape(DataAbstract_ptr left, DataAbst
436    return shape2;    return shape2;
437  }  }
438    
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
439  }   // end anonymous namespace  }   // end anonymous namespace
440    
441    
# Line 435  opToString(ES_optype op) Line 451  opToString(ES_optype op)
451    return ES_opstrings[op];    return ES_opstrings[op];
452  }  }
453    
454    void DataLazy::LazyNodeSetup()
455    {
456    #ifdef _OPENMP
457        int numthreads=omp_get_max_threads();
458        m_samples.resize(numthreads*m_samplesize);
459        m_sampleids=new int[numthreads];
460        for (int i=0;i<numthreads;++i)
461        {
462            m_sampleids[i]=-1;  
463        }
464    #else
465        m_samples.resize(m_samplesize);
466        m_sampleids=new int[1];
467        m_sampleids[0]=-1;
468    #endif  // _OPENMP
469    }
470    
471    
472    // Creates an identity node
473  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
474      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
475        ,m_sampleids(0),
476        m_samples(1)
477  {  {
478     if (p->isLazy())     if (p->isLazy())
479     {     {
# Line 456  LAZYDEBUG(cout << "Wrapping " << dr.get( Line 492  LAZYDEBUG(cout << "Wrapping " << dr.get(
492  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
493  }  }
494    
   
   
   
495  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
496      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
497      m_op(op),      m_op(op),
498      m_axis_offset(0),      m_axis_offset(0),
499      m_transpose(0),      m_transpose(0),
500      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
501  {  {
502     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
503     {     {
504      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
505     }     }
# Line 482  DataLazy::DataLazy(DataAbstract_ptr left Line 515  DataLazy::DataLazy(DataAbstract_ptr left
515     }     }
516     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
517     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
518     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
519     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
520     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
521       LazyNodeSetup();
522     SIZELIMIT     SIZELIMIT
523  }  }
524    
# Line 553  LAZYDEBUG(cout << "Right " << right.get( Line 585  LAZYDEBUG(cout << "Right " << right.get(
585      m_readytype='C';      m_readytype='C';
586     }     }
587     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
588     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
589     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
590       LazyNodeSetup();
591     SIZELIMIT     SIZELIMIT
592  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
593  }  }
# Line 620  DataLazy::DataLazy(DataAbstract_ptr left Line 651  DataLazy::DataLazy(DataAbstract_ptr left
651      m_readytype='C';      m_readytype='C';
652     }     }
653     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
654     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
655     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
656       LazyNodeSetup();
657     SIZELIMIT     SIZELIMIT
658  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
659  }  }
# Line 651  DataLazy::DataLazy(DataAbstract_ptr left Line 681  DataLazy::DataLazy(DataAbstract_ptr left
681     }     }
682     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
683     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
684     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
685     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
686     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
687       LazyNodeSetup();
688     SIZELIMIT     SIZELIMIT
689  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
690  }  }
# Line 682  DataLazy::DataLazy(DataAbstract_ptr left Line 711  DataLazy::DataLazy(DataAbstract_ptr left
711     }     }
712     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
713     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
714     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
715     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
716     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
717       LazyNodeSetup();
718     SIZELIMIT     SIZELIMIT
719  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
720  }  }
# Line 714  DataLazy::DataLazy(DataAbstract_ptr left Line 742  DataLazy::DataLazy(DataAbstract_ptr left
742     }     }
743     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
744     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
745     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
746     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
747     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
748       LazyNodeSetup();
749     SIZELIMIT     SIZELIMIT
750  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
751  }  }
752    
753  DataLazy::~DataLazy()  
754    namespace
755  {  {
 }  
756    
757        inline int max3(int a, int b, int c)
758        {
759        int t=(a>b?a:b);
760        return (t>c?t:c);
761    
762  int      }
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
763  }  }
764    
765    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
766  size_t      : parent(left->getFunctionSpace(), left->getShape()),
767  DataLazy::getMaxSampleSize() const      m_op(CONDEVAL),
768        m_axis_offset(0),
769        m_transpose(0),
770        m_tol(0)
771  {  {
772      return m_maxsamplesize;  
773       DataLazy_ptr lmask;
774       DataLazy_ptr lleft;
775       DataLazy_ptr lright;
776       if (!mask->isLazy())
777       {
778        lmask=DataLazy_ptr(new DataLazy(mask));
779       }
780       else
781       {
782        lmask=dynamic_pointer_cast<DataLazy>(mask);
783       }
784       if (!left->isLazy())
785       {
786        lleft=DataLazy_ptr(new DataLazy(left));
787       }
788       else
789       {
790        lleft=dynamic_pointer_cast<DataLazy>(left);
791       }
792       if (!right->isLazy())
793       {
794        lright=DataLazy_ptr(new DataLazy(right));
795       }
796       else
797       {
798        lright=dynamic_pointer_cast<DataLazy>(right);
799       }
800       m_readytype=lmask->m_readytype;
801       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
802       {
803        throw DataException("Programmer Error - condEval arguments must have the same readytype");
804       }
805       m_left=lleft;
806       m_right=lright;
807       m_mask=lmask;
808       m_samplesize=getNumDPPSample()*getNoValues();
809       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
810       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
811       LazyNodeSetup();
812       SIZELIMIT
813    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
814  }  }
815    
816    
817    
818  size_t  DataLazy::~DataLazy()
 DataLazy::getSampleBufferSize() const  
819  {  {
820      return m_maxsamplesize*(max(1,m_buffsRequired));     delete[] m_sampleids;
821  }  }
822    
823    
824  /*  /*
825    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
826    This does the work for the collapse method.    This does the work for the collapse method.
827    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
828  */  */
829  DataReady_ptr  DataReady_ptr
830  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
831  {  {
832    if (m_readytype=='E')    if (m_readytype=='E')
833    { // this is more an efficiency concern than anything else    { // this is more an efficiency concern than anything else
# Line 891  DataLazy::collapseToReady() Line 963  DataLazy::collapseToReady()
963      case SWAP:      case SWAP:
964      result=left.swapaxes(m_axis_offset, m_transpose);      result=left.swapaxes(m_axis_offset, m_transpose);
965      break;      break;
966        case MINVAL:
967        result=left.minval();
968        break;
969        case MAXVAL:
970        result=left.minval();
971        break;
972      default:      default:
973      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
974    }    }
# Line 904  DataLazy::collapseToReady() Line 982  DataLazy::collapseToReady()
982     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
983  */  */
984  void  void
985  DataLazy::collapse()  DataLazy::collapse() const
986  {  {
987    if (m_op==IDENTITY)    if (m_op==IDENTITY)
988    {    {
# Line 918  DataLazy::collapse() Line 996  DataLazy::collapse()
996    m_op=IDENTITY;    m_op=IDENTITY;
997  }  }
998    
999  /*  
1000    \brief Compute the value of the expression (unary operation) for the given sample.  
1001    \return Vector which stores the value of the subexpression for the given sample.  
1002    \param v A vector to store intermediate results.  
1003    \param offset Index in v to begin storing results.  
1004    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1005    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1006        {\
1007    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1008    If the result is stored in v it should be stored at the offset given.        { \
1009    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1010  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1011  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1012  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1013             lroffset+=leftstep; \
1014             rroffset+=rightstep; \
1015          }\
1016          lroffset+=oleftstep;\
1017          rroffset+=orightstep;\
1018        }
1019    
1020    
1021    // The result will be stored in m_samples
1022    // The return value is a pointer to the DataVector, offset is the offset within the return value
1023    const DataTypes::RealVectorType*
1024    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1025    {
1026    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1027        // collapse so we have a 'E' node or an IDENTITY for some other type
1028      if (m_readytype!='E' && m_op!=IDENTITY)
1029      {
1030        collapse();
1031      }
1032      if (m_op==IDENTITY)  
1033      {
1034        const ValueType& vec=m_id->getVectorRO();
1035        roffset=m_id->getPointOffset(sampleNo, 0);
1036    #ifdef LAZY_STACK_PROF
1037    int x;
1038    if (&x<stackend[omp_get_thread_num()])
1039    {
1040           stackend[omp_get_thread_num()]=&x;
1041    }
1042    #endif
1043        return &(vec);
1044      }
1045      if (m_readytype!='E')
1046      {
1047        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1048      }
1049      if (m_sampleids[tid]==sampleNo)
1050      {
1051        roffset=tid*m_samplesize;
1052        return &(m_samples);        // sample is already resolved
1053      }
1054      m_sampleids[tid]=sampleNo;
1055    
1056      switch (getOpgroup(m_op))
1057      {
1058      case G_UNARY:
1059      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1060      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1061      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1062      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1063      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1064      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1065      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1066      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1067      default:
1068        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1069      }
1070    }
1071    
1072    const DataTypes::RealVectorType*
1073    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1074  {  {
1075      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1076      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1077      // processing single points.      // processing single points.
1078        // we will also know we won't get identity nodes
1079    if (m_readytype!='E')    if (m_readytype!='E')
1080    {    {
1081      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1082    }    }
1083    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1084    const double* left=&((*vleft)[roffset]);    {
1085    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1086    roffset=offset;    }
1087      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1088      const double* left=&((*leftres)[roffset]);
1089      roffset=m_samplesize*tid;
1090      double* result=&(m_samples[roffset]);
1091    switch (m_op)    switch (m_op)
1092    {    {
1093      case SIN:        case SIN:  
# Line 1053  DataLazy::resolveUnary(ValueType& v, siz Line 1197  DataLazy::resolveUnary(ValueType& v, siz
1197      default:      default:
1198      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1199    }    }
1200    return &v;    return &(m_samples);
1201  }  }
1202    
1203    
1204    const DataTypes::RealVectorType*
1205    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1206    {
1207        // we assume that any collapsing has been done before we get here
1208        // since we only have one argument we don't need to think about only
1209        // processing single points.
1210        // we will also know we won't get identity nodes
1211      if (m_readytype!='E')
1212      {
1213        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1214      }
1215      if (m_op==IDENTITY)
1216      {
1217        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1218      }
1219      size_t loffset=0;
1220      const DataTypes::RealVectorType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1221    
1222      roffset=m_samplesize*tid;
1223      unsigned int ndpps=getNumDPPSample();
1224      unsigned int psize=DataTypes::noValues(m_left->getShape());
1225      double* result=&(m_samples[roffset]);
1226      switch (m_op)
1227      {
1228        case MINVAL:
1229        {
1230          for (unsigned int z=0;z<ndpps;++z)
1231          {
1232            FMin op;
1233            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1234            loffset+=psize;
1235            result++;
1236          }
1237        }
1238        break;
1239        case MAXVAL:
1240        {
1241          for (unsigned int z=0;z<ndpps;++z)
1242          {
1243          FMax op;
1244          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1245          loffset+=psize;
1246          result++;
1247          }
1248        }
1249        break;
1250        default:
1251        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1252      }
1253      return &(m_samples);
1254    }
1255    
1256    const DataTypes::RealVectorType*
1257    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1258  {  {
1259      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1260      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1261      // processing single points.      // processing single points.
1262    if (m_readytype!='E')    if (m_readytype!='E')
1263    {    {
1264      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1265    }    }
1266      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1267    size_t subroffset=roffset+m_samplesize;    {
1268  LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1269    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    }
1270    roffset=offset;    size_t subroffset;
1271      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1272      roffset=m_samplesize*tid;
1273    size_t loop=0;    size_t loop=0;
1274    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1275    size_t step=getNoValues();    size_t step=getNoValues();
1276      size_t offset=roffset;
1277    switch (m_op)    switch (m_op)
1278    {    {
1279      case SYM:      case SYM:
1280      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1281      {      {
1282          DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1283          subroffset+=step;          subroffset+=step;
1284          offset+=step;          offset+=step;
1285      }      }
# Line 1104  LAZYDEBUG(cerr << "subroffset=" << subro Line 1287  LAZYDEBUG(cerr << "subroffset=" << subro
1287      case NSYM:      case NSYM:
1288      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1289      {      {
1290          DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1291          subroffset+=step;          subroffset+=step;
1292          offset+=step;          offset+=step;
1293      }      }
# Line 1112  LAZYDEBUG(cerr << "subroffset=" << subro Line 1295  LAZYDEBUG(cerr << "subroffset=" << subro
1295      default:      default:
1296      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1297    }    }
1298    return &v;    return &m_samples;
1299  }  }
1300    
1301  /*  const DataTypes::RealVectorType*
1302    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1303  {  {
1304      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1305      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1306      // processing single points.      // processing single points.
1307    if (m_readytype!='E')    if (m_readytype!='E')
1308    {    {
1309      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1310      }
1311      if (m_op==IDENTITY)
1312      {
1313        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1314    }    }
     // since we can't write the result over the input, we need a result offset further along  
1315    size_t subroffset;    size_t subroffset;
1316    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    size_t offset;
1317  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1318  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)    roffset=m_samplesize*tid;
1319    roffset=offset;    offset=roffset;
1320    size_t loop=0;    size_t loop=0;
1321    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1322    size_t outstep=getNoValues();    size_t outstep=getNoValues();
1323    size_t instep=m_left->getNoValues();    size_t instep=m_left->getNoValues();
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
1324    switch (m_op)    switch (m_op)
1325    {    {
1326      case TRACE:      case TRACE:
1327      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1328      {      {
1329  size_t zz=sampleNo*getNumDPPSample()+loop;              DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "point=" <<  zz<< endl;)  
 LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)  
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
1330          subroffset+=instep;          subroffset+=instep;
1331          offset+=outstep;          offset+=outstep;
1332      }      }
# Line 1174  LAZYDEBUG(cerr << "Result of trace=" << Line 1334  LAZYDEBUG(cerr << "Result of trace=" <<
1334      case TRANS:      case TRANS:
1335      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1336      {      {
1337              DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);              DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1338          subroffset+=instep;          subroffset+=instep;
1339          offset+=outstep;          offset+=outstep;
1340      }      }
# Line 1182  LAZYDEBUG(cerr << "Result of trace=" << Line 1342  LAZYDEBUG(cerr << "Result of trace=" <<
1342      default:      default:
1343      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1344    }    }
1345    return &v;    return &m_samples;
1346  }  }
1347    
1348    
1349  /*  const DataTypes::RealVectorType*
1350    \brief Compute the value of the expression (unary operation with int params) for the given sample.  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1351  {  {
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
1352    if (m_readytype!='E')    if (m_readytype!='E')
1353    {    {
1354      throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1355      }
1356      if (m_op==IDENTITY)
1357      {
1358        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1359    }    }
     // since we can't write the result over the input, we need a result offset further along  
1360    size_t subroffset;    size_t subroffset;
1361    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    size_t offset;
1362  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1363  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)    roffset=m_samplesize*tid;
1364    roffset=offset;    offset=roffset;
1365    size_t loop=0;    size_t loop=0;
1366    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1367    size_t outstep=getNoValues();    size_t outstep=getNoValues();
1368    size_t instep=m_left->getNoValues();    size_t instep=m_left->getNoValues();
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
1369    switch (m_op)    switch (m_op)
1370    {    {
1371      case SWAP:      case SWAP:
1372      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1373      {      {
1374              DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);              DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1375          subroffset+=instep;          subroffset+=instep;
1376          offset+=outstep;          offset+=outstep;
1377      }      }
1378      break;      break;
1379      default:      default:
1380      throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1381    }    }
1382    return &v;    return &m_samples;
1383  }  }
1384    
1385    const DataTypes::RealVectorType*
1386    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1387    {
1388      if (m_readytype!='E')
1389      {
1390        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1391      }
1392      if (m_op!=CONDEVAL)
1393      {
1394        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1395      }
1396      size_t subroffset;
1397    
1398      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1399      const ValueType* srcres=0;
1400      if ((*maskres)[subroffset]>0)
1401      {
1402        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1403      }
1404      else
1405      {
1406        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1407      }
1408    
1409  #define PROC_OP(TYPE,X)                               \    // Now we need to copy the result
1410      for (int j=0;j<onumsteps;++j)\  
1411      {\    roffset=m_samplesize*tid;
1412        for (int i=0;i<numsteps;++i,resultp+=resultStep) \    for (int i=0;i<m_samplesize;++i)
1413        { \    {
1414  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\      m_samples[roffset+i]=(*srcres)[subroffset+i];  
1415  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\    }
1416           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  
1417  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \    return &m_samples;
1418           lroffset+=leftstep; \  }
          rroffset+=rightstep; \  
       }\  
       lroffset+=oleftstep;\  
       rroffset+=orightstep;\  
     }  
1419    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1420  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1421  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1422  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 1274  LAZYDEBUG(cout << " result=      " << re Line 1426  LAZYDEBUG(cout << " result=      " << re
1426  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1427  // For example, 2+Vector.  // For example, 2+Vector.
1428  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1429  DataTypes::ValueType*  const DataTypes::RealVectorType*
1430  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1431  {  {
1432  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1433    
# Line 1387  LAZYDEBUG(cout << "Resolve binary: " << Line 1539  LAZYDEBUG(cout << "Resolve binary: " <<
1539    
1540    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1541      // Get the values of sub-expressions      // Get the values of sub-expressions
1542    const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1543      // calcBufss for why we can't put offset as the 2nd param above    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
   const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
1544  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1545  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1546  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
# Line 1399  LAZYDEBUG(cout << "onumsteps=" << onumst Line 1549  LAZYDEBUG(cout << "onumsteps=" << onumst
1549  LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1550  LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1551    
1552    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1553    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1554    
1555    
1556    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    roffset=m_samplesize*tid;
1557      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1558    switch(m_op)    switch(m_op)
1559    {    {
1560      case ADD:      case ADD:
# Line 1421  LAZYDEBUG(cout << "" << LS << RS << LN < Line 1575  LAZYDEBUG(cout << "" << LS << RS << LN <
1575      default:      default:
1576      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1577    }    }
1578    roffset=offset;  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1579    return &v;    return &m_samples;
1580  }  }
1581    
1582    
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1583  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1584  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1585  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1586  DataTypes::ValueType*  const DataTypes::RealVectorType*
1587  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1588  {  {
1589  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1590    
1591    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1592      // first work out which of the children are expanded      // first work out which of the children are expanded
1593    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1594    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1595    int steps=getNumDPPSample();    int steps=getNumDPPSample();
 /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
1596    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1597    int rightStep=(rightExp?m_right->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1598    
1599    int resultStep=getNoValues();    int resultStep=getNoValues();
1600      // Get the values of sub-expressions (leave a gap of one sample for the result).    roffset=m_samplesize*tid;
1601    int gap=offset+m_samplesize;      size_t offset=roffset;
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
   
   const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);  
   gap+=m_left->getMaxSampleSize();  
1602    
1603      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1604    
1605  LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)    const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
   
   
   const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);  
1606    
1607  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1608  cout << getNoValues() << endl;)  cout << getNoValues() << endl;)
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
1609    
1610  for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
1611  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1612  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1613  LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
# Line 1499  LAZYDEBUG(cout << "m_samplesize=" << m_s Line 1616  LAZYDEBUG(cout << "m_samplesize=" << m_s
1616  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1617  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1618    
1619    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1620    switch(m_op)    switch(m_op)
1621    {    {
1622      case PROD:      case PROD:
1623      for (int i=0;i<steps;++i,resultp+=resultStep)      for (int i=0;i<steps;++i,resultp+=resultStep)
1624      {      {
   
 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)  
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
1625            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1626            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1627    
# Line 1518  LAZYDEBUG(cout << DataTypes::pointToStri Line 1630  LAZYDEBUG(cout << DataTypes::pointToStri
1630    
1631            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1632    
 LAZYDEBUG(cout << "Results--\n";  
 {  
   DataVector dv(getNoValues());  
 for (int z=0;z<getNoValues();++z)  
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
1633        lroffset+=leftStep;        lroffset+=leftStep;
1634        rroffset+=rightStep;        rroffset+=rightStep;
1635      }      }
# Line 1539  cout << "\nWritten to: " << resultp << " Line 1638  cout << "\nWritten to: " << resultp << "
1638      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1639    }    }
1640    roffset=offset;    roffset=offset;
1641    return &v;    return &m_samples;
1642  }  }
1643    
1644    
1645    const DataTypes::RealVectorType*
1646  /*  DataLazy::resolveSample(int sampleNo, size_t& roffset) const
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
 const DataTypes::ValueType*  
 DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
   
 }  
   
 const DataTypes::ValueType*  
 DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  
1647  {  {
1648  #ifdef _OPENMP  #ifdef _OPENMP
1649      int tid=omp_get_thread_num();      int tid=omp_get_thread_num();
1650  #else  #else
1651      int tid=0;      int tid=0;
1652  #endif  #endif
1653      return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);  
1654    #ifdef LAZY_STACK_PROF
1655        stackstart[tid]=&tid;
1656        stackend[tid]=&tid;
1657        const DataTypes::RealVectorType* r=resolveNodeSample(tid, sampleNo, roffset);
1658        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1659        #pragma omp critical
1660        if (d>maxstackuse)
1661        {
1662    cout << "Max resolve Stack use " << d << endl;
1663            maxstackuse=d;
1664        }
1665        return r;
1666    #else
1667        return resolveNodeSample(tid, sampleNo, roffset);
1668    #endif
1669  }  }
1670    
1671    
1672  // This needs to do the work of the idenity constructor  // This needs to do the work of the identity constructor
1673  void  void
1674  DataLazy::resolveToIdentity()  DataLazy::resolveToIdentity()
1675  {  {
1676     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1677      return;      return;
1678     DataReady_ptr p=resolve();     DataReady_ptr p=resolveNodeWorker();
1679     makeIdentity(p);     makeIdentity(p);
1680  }  }
1681    
# Line 1635  void DataLazy::makeIdentity(const DataRe Line 1691  void DataLazy::makeIdentity(const DataRe
1691     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1692     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1693     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1694     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1695     m_left.reset();     m_left.reset();
1696     m_right.reset();     m_right.reset();
1697  }  }
1698    
1699  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
1700  DataReady_ptr  DataReady_ptr
1701  DataLazy::resolve()  DataLazy::resolve()
1702  {  {
1703        resolveToIdentity();
1704        return m_id;
1705    }
1706    
1707    
1708  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  /* This is really a static method but I think that caused problems in windows */
1709  LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  void
1710    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1711    {
1712      if (dats.empty())
1713      {
1714        return;
1715      }
1716      vector<DataLazy*> work;
1717      FunctionSpace fs=dats[0]->getFunctionSpace();
1718      bool match=true;
1719      for (int i=dats.size()-1;i>=0;--i)
1720      {
1721        if (dats[i]->m_readytype!='E')
1722        {
1723            dats[i]->collapse();
1724        }
1725        if (dats[i]->m_op!=IDENTITY)
1726        {
1727            work.push_back(dats[i]);
1728            if (fs!=dats[i]->getFunctionSpace())
1729            {
1730                match=false;
1731            }
1732        }
1733      }
1734      if (work.empty())
1735      {
1736        return;     // no work to do
1737      }
1738      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1739      {     // it is possible that dats[0] is one of the objects which we discarded and
1740            // all the other functionspaces match.
1741        vector<DataExpanded*> dep;
1742        vector<ValueType*> vecs;
1743        for (int i=0;i<work.size();++i)
1744        {
1745            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1746            vecs.push_back(&(dep[i]->getVectorRW()));
1747        }
1748        int totalsamples=work[0]->getNumSamples();
1749        const ValueType* res=0; // Storage for answer
1750        int sample;
1751        #pragma omp parallel private(sample, res)
1752        {
1753            size_t roffset=0;
1754            #pragma omp for schedule(static)
1755            for (sample=0;sample<totalsamples;++sample)
1756            {
1757            roffset=0;
1758            int j;
1759            for (j=work.size()-1;j>=0;--j)
1760            {
1761    #ifdef _OPENMP
1762                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1763    #else
1764                    res=work[j]->resolveNodeSample(0,sample,roffset);
1765    #endif
1766                    RealVectorType::size_type outoffset=dep[j]->getPointOffset(sample,0);
1767                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(RealVectorType::ElementType));
1768            }
1769            }
1770        }
1771        // Now we need to load the new results as identity ops into the lazy nodes
1772        for (int i=work.size()-1;i>=0;--i)
1773        {
1774            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1775        }
1776      }
1777      else  // functionspaces do not match
1778      {
1779        for (int i=0;i<work.size();++i)
1780        {
1781            work[i]->resolveToIdentity();
1782        }
1783      }
1784    }
1785    
1786    
1787    
1788    // This version of resolve uses storage in each node to hold results
1789    DataReady_ptr
1790    DataLazy::resolveNodeWorker()
1791    {
1792    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1793    {    {
1794      collapse();      collapse();
# Line 1659  LAZYDEBUG(cout << "Buffers=" << m_buffsR Line 1798  LAZYDEBUG(cout << "Buffers=" << m_buffsR
1798      return m_id;      return m_id;
1799    }    }
1800      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
1801    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1802    ValueType& resvec=result->getVectorRW();    ValueType& resvec=result->getVectorRW();
1803    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1804    
1805    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1806    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1807    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
   size_t resoffset=0;       // where in the vector to find the answer  
1808  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1809    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1810    {    {
1811        if (sample==0)  {ENABLEDEBUG}      size_t roffset=0;
1812    #ifdef LAZY_STACK_PROF
1813  //       if (sample==5800/getNumDPPSample())  {ENABLEDEBUG}      stackstart[omp_get_thread_num()]=&roffset;
1814  LAZYDEBUG(cout << "################################# " << sample << endl;)      stackend[omp_get_thread_num()]=&roffset;
1815    #endif
1816        #pragma omp for schedule(static)
1817        for (sample=0;sample<totalsamples;++sample)
1818        {
1819            roffset=0;
1820  #ifdef _OPENMP  #ifdef _OPENMP
1821      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1822  #else  #else
1823      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1824  #endif  #endif
1825  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1826  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1827      outoffset=result->getPointOffset(sample,0);              RealVectorType::size_type outoffset=result->getPointOffset(sample,0);
1828  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(RealVectorType::ElementType));
1829      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      }
1830      {    }
1831  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  #ifdef LAZY_STACK_PROF
1832      resvec[outoffset]=(*res)[resoffset];    for (int i=0;i<getNumberOfThreads();++i)
1833      }    {
1834  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)      size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1835  LAZYDEBUG(cerr << "*********************************" << endl;)  //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1836      DISABLEDEBUG      if (r>maxstackuse)
1837        {
1838            maxstackuse=r;
1839        }
1840    }    }
1841      cout << "Max resolve Stack use=" << maxstackuse << endl;
1842    #endif
1843    return resptr;    return resptr;
1844  }  }
1845    
# Line 1708  std::string Line 1847  std::string
1847  DataLazy::toString() const  DataLazy::toString() const
1848  {  {
1849    ostringstream oss;    ostringstream oss;
1850    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1851    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1852      {
1853      case 1:   // tree format
1854        oss << endl;
1855        intoTreeString(oss,"");
1856        break;
1857      case 2:   // just the depth
1858        break;
1859      default:
1860        intoString(oss);
1861        break;
1862      }
1863    return oss.str();    return oss.str();
1864  }  }
1865    
# Line 1750  DataLazy::intoString(ostringstream& oss) Line 1900  DataLazy::intoString(ostringstream& oss)
1900    case G_UNARY_P:    case G_UNARY_P:
1901    case G_NP1OUT:    case G_NP1OUT:
1902    case G_NP1OUT_P:    case G_NP1OUT_P:
1903      case G_REDUCTION:
1904      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1905      m_left->intoString(oss);      m_left->intoString(oss);
1906      oss << ')';      oss << ')';
# Line 1767  DataLazy::intoString(ostringstream& oss) Line 1918  DataLazy::intoString(ostringstream& oss)
1918      oss << ", " << m_axis_offset << ", " << m_transpose;      oss << ", " << m_axis_offset << ", " << m_transpose;
1919      oss << ')';      oss << ')';
1920      break;      break;
1921      case G_CONDEVAL:
1922        oss << opToString(m_op)<< '(' ;
1923        m_mask->intoString(oss);
1924        oss << " ? ";
1925        m_left->intoString(oss);
1926        oss << " : ";
1927        m_right->intoString(oss);
1928        oss << ')';
1929        break;
1930    default:    default:
1931      oss << "UNKNOWN";      oss << "UNKNOWN";
1932    }    }
1933  }  }
1934    
1935    
1936    void
1937    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1938    {
1939      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1940      switch (getOpgroup(m_op))
1941      {
1942      case G_IDENTITY:
1943        if (m_id->isExpanded())
1944        {
1945           oss << "E";
1946        }
1947        else if (m_id->isTagged())
1948        {
1949          oss << "T";
1950        }
1951        else if (m_id->isConstant())
1952        {
1953          oss << "C";
1954        }
1955        else
1956        {
1957          oss << "?";
1958        }
1959        oss << '@' << m_id.get() << endl;
1960        break;
1961      case G_BINARY:
1962        oss << opToString(m_op) << endl;
1963        indent+='.';
1964        m_left->intoTreeString(oss, indent);
1965        m_right->intoTreeString(oss, indent);
1966        break;
1967      case G_UNARY:
1968      case G_UNARY_P:
1969      case G_NP1OUT:
1970      case G_NP1OUT_P:
1971      case G_REDUCTION:
1972        oss << opToString(m_op) << endl;
1973        indent+='.';
1974        m_left->intoTreeString(oss, indent);
1975        break;
1976      case G_TENSORPROD:
1977        oss << opToString(m_op) << endl;
1978        indent+='.';
1979        m_left->intoTreeString(oss, indent);
1980        m_right->intoTreeString(oss, indent);
1981        break;
1982      case G_NP1OUT_2P:
1983        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1984        indent+='.';
1985        m_left->intoTreeString(oss, indent);
1986        break;
1987      default:
1988        oss << "UNKNOWN";
1989      }
1990    }
1991    
1992    
1993  DataAbstract*  DataAbstract*
1994  DataLazy::deepCopy()  DataLazy::deepCopy() const
1995  {  {
1996    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1997    {    {
1998    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1999    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
2000      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2001      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2002    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2003    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2004    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2005      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2006      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2007    default:    default:
2008      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2009    }    }
2010  }  }
2011    
2012    
2013    
2014  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2015  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2016  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
2017  // or it could be some function of the lengths of the DataReady instances which  // or it could be some function of the lengths of the DataReady instances which
2018  // form part of the expression.  // form part of the expression.
2019  // Rather than have people making assumptions, I have disabled the method.  // Rather than have people making assumptions, I have disabled the method.
2020  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2021  DataLazy::getLength() const  DataLazy::getLength() const
2022  {  {
2023    throw DataException("getLength() does not make sense for lazy data.");    throw DataException("getLength() does not make sense for lazy data.");
# Line 1809  DataLazy::getSlice(const DataTypes::Regi Line 2032  DataLazy::getSlice(const DataTypes::Regi
2032    
2033    
2034  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2035  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2036  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2037                   int dataPointNo)                   int dataPointNo)
2038  {  {
# Line 1835  DataLazy::getPointOffset(int sampleNo, Line 2058  DataLazy::getPointOffset(int sampleNo,
2058  }  }
2059    
2060  // To do this we need to rely on our child nodes  // To do this we need to rely on our child nodes
2061  DataTypes::ValueType::size_type  DataTypes::RealVectorType::size_type
2062  DataLazy::getPointOffset(int sampleNo,  DataLazy::getPointOffset(int sampleNo,
2063                   int dataPointNo) const                   int dataPointNo) const
2064  {  {
# Line 1868  DataLazy::getPointOffset(int sampleNo, Line 2091  DataLazy::getPointOffset(int sampleNo,
2091  void  void
2092  DataLazy::setToZero()  DataLazy::setToZero()
2093  {  {
2094  //   DataTypes::ValueType v(getNoValues(),0);  //   DataTypes::RealVectorType v(getNoValues(),0);
2095  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));  //   m_id=DataReady_ptr(new DataConstant(getFunctionSpace(),getShape(),v));
2096  //   m_op=IDENTITY;  //   m_op=IDENTITY;
2097  //   m_right.reset();    //   m_right.reset();  
# Line 1876  DataLazy::setToZero() Line 2099  DataLazy::setToZero()
2099  //   m_readytype='C';  //   m_readytype='C';
2100  //   m_buffsRequired=1;  //   m_buffsRequired=1;
2101    
2102      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2103    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
   
2104  }  }
2105    
2106  bool  bool

Legend:
Removed from v.2496  
changed lines
  Added in v.5938

  ViewVC Help
Powered by ViewVC 1.1.26