/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2496 by jfenwick, Fri Jun 26 06:09:47 2009 UTC trunk/escriptcore/src/DataLazy.cpp revision 4657 by jfenwick, Thu Feb 6 06:12:20 2014 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2014 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    
18  #include "DataLazy.h"  #include "DataLazy.h"
19  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
20  #ifdef _OPENMP  #ifdef _OPENMP
21  #include <omp.h>  #include <omp.h>
22  #endif  #endif
# Line 30  Line 28 
28    
29  #include "EscriptParams.h"  #include "EscriptParams.h"
30    
31    #ifdef USE_NETCDF
32    #include <netcdfcpp.h>
33    #endif
34    
35  #include <iomanip>      // for some fancy formatting in debug  #include <iomanip>      // for some fancy formatting in debug
36    
37  // #define LAZYDEBUG(X) if (privdebug){X;}  // #define LAZYDEBUG(X) if (privdebug){X;}
# Line 42  bool privdebug=false; Line 44  bool privdebug=false;
44  #define DISABLEDEBUG privdebug=false;  #define DISABLEDEBUG privdebug=false;
45  }  }
46    
47  // #define SIZELIMIT  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
48  // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}  
49  //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
50    
51  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  
52    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
53    
54  /*  /*
55  How does DataLazy work?  How does DataLazy work?
# Line 59  A special operation, IDENTITY, stores an Line 62  A special operation, IDENTITY, stores an
62  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
63    
64  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
65  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
66    
67  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
68  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 68  I will refer to individual DataLazy obje Line 71  I will refer to individual DataLazy obje
71  Each node also stores:  Each node also stores:
72  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
73      evaluated.      evaluated.
74  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
75      evaluate the expression.      evaluate the expression.
76  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
77    
# Line 110  namespace escript Line 113  namespace escript
113  namespace  namespace
114  {  {
115    
116    
117    // enabling this will print out when ever the maximum stacksize used by resolve increases
118    // it assumes _OPENMP is also in use
119    //#define LAZY_STACK_PROF
120    
121    
122    
123    #ifndef _OPENMP
124      #ifdef LAZY_STACK_PROF
125      #undef LAZY_STACK_PROF
126      #endif
127    #endif
128    
129    
130    #ifdef LAZY_STACK_PROF
131    std::vector<void*> stackstart(getNumberOfThreads());
132    std::vector<void*> stackend(getNumberOfThreads());
133    size_t maxstackuse=0;
134    #endif
135    
136  enum ES_opgroup  enum ES_opgroup
137  {  {
138     G_UNKNOWN,     G_UNKNOWN,
# Line 120  enum ES_opgroup Line 143  enum ES_opgroup
143     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
144     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
145     G_TENSORPROD,    // general tensor product     G_TENSORPROD,    // general tensor product
146     G_NP1OUT_2P      // non-pointwise op with one output requiring two params     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
147       G_REDUCTION,     // non-pointwise unary op with a scalar output
148       G_CONDEVAL
149  };  };
150    
151    
# Line 135  string ES_opstrings[]={"UNKNOWN","IDENTI Line 160  string ES_opstrings[]={"UNKNOWN","IDENTI
160              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
161              "prod",              "prod",
162              "transpose", "trace",              "transpose", "trace",
163              "swapaxes"};              "swapaxes",
164  int ES_opcount=41;              "minval", "maxval",
165                "condEval"};
166    int ES_opcount=44;
167  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
168              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
169              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 146  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 173  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
173              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
174              G_TENSORPROD,              G_TENSORPROD,
175              G_NP1OUT_P, G_NP1OUT_P,              G_NP1OUT_P, G_NP1OUT_P,
176              G_NP1OUT_2P};              G_NP1OUT_2P,
177                G_REDUCTION, G_REDUCTION,
178                G_CONDEVAL};
179  inline  inline
180  ES_opgroup  ES_opgroup
181  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 167  resultFS(DataAbstract_ptr left, DataAbst Line 196  resultFS(DataAbstract_ptr left, DataAbst
196    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
197    if (l!=r)    if (l!=r)
198    {    {
199      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
200        if (res==1)
201      {      {
202      return l;      return l;
203      }      }
204      if (l.probeInterpolation(r))      if (res==-1)
205      {      {
206      return r;      return r;
207      }      }
# Line 191  resultShape(DataAbstract_ptr left, DataA Line 221  resultShape(DataAbstract_ptr left, DataA
221        {        {
222          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
223        }        }
224    
225        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
226        {        {
227          return right->getShape();          return right->getShape();
# Line 218  resultShape(DataAbstract_ptr left, ES_op Line 249  resultShape(DataAbstract_ptr left, ES_op
249          int rank=left->getRank();          int rank=left->getRank();
250          if (axis_offset<0 || axis_offset>rank)          if (axis_offset<0 || axis_offset>rank)
251          {          {
252                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
253              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
254              for (int i=0; i<rank; i++)              throw DataException(e.str());
255            }
256            for (int i=0; i<rank; i++)
257          {          {
258             int index = (axis_offset+i)%rank;             int index = (axis_offset+i)%rank;
259                 sh.push_back(s[index]); // Append to new shape             sh.push_back(s[index]); // Append to new shape
260              }          }
261          return sh;          return sh;
262         }         }
263      break;      break;
# Line 303  SwapShape(DataAbstract_ptr left, const i Line 336  SwapShape(DataAbstract_ptr left, const i
336          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
337       }       }
338       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
339          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
340            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
341            throw DataException(e.str());
342       }       }
343       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
344           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          stringstream e;
345            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
346            throw DataException(e.str());
347       }       }
348       if (axis0 == axis1) {       if (axis0 == axis1) {
349           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 395  GTPShape(DataAbstract_ptr left, DataAbst Line 432  GTPShape(DataAbstract_ptr left, DataAbst
432    return shape2;    return shape2;
433  }  }
434    
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
435  }   // end anonymous namespace  }   // end anonymous namespace
436    
437    
# Line 435  opToString(ES_optype op) Line 447  opToString(ES_optype op)
447    return ES_opstrings[op];    return ES_opstrings[op];
448  }  }
449    
450    void DataLazy::LazyNodeSetup()
451    {
452    #ifdef _OPENMP
453        int numthreads=omp_get_max_threads();
454        m_samples.resize(numthreads*m_samplesize);
455        m_sampleids=new int[numthreads];
456        for (int i=0;i<numthreads;++i)
457        {
458            m_sampleids[i]=-1;  
459        }
460    #else
461        m_samples.resize(m_samplesize);
462        m_sampleids=new int[1];
463        m_sampleids[0]=-1;
464    #endif  // _OPENMP
465    }
466    
467    
468    // Creates an identity node
469  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
470      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
471        ,m_sampleids(0),
472        m_samples(1)
473  {  {
474     if (p->isLazy())     if (p->isLazy())
475     {     {
# Line 456  LAZYDEBUG(cout << "Wrapping " << dr.get( Line 488  LAZYDEBUG(cout << "Wrapping " << dr.get(
488  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
489  }  }
490    
   
   
   
491  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
492      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
493      m_op(op),      m_op(op),
494      m_axis_offset(0),      m_axis_offset(0),
495      m_transpose(0),      m_transpose(0),
496      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
497  {  {
498     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
499     {     {
500      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
501     }     }
# Line 482  DataLazy::DataLazy(DataAbstract_ptr left Line 511  DataLazy::DataLazy(DataAbstract_ptr left
511     }     }
512     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
513     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
514     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
515     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
516     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
517       LazyNodeSetup();
518     SIZELIMIT     SIZELIMIT
519  }  }
520    
# Line 553  LAZYDEBUG(cout << "Right " << right.get( Line 581  LAZYDEBUG(cout << "Right " << right.get(
581      m_readytype='C';      m_readytype='C';
582     }     }
583     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
584     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
585     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
586       LazyNodeSetup();
587     SIZELIMIT     SIZELIMIT
588  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
589  }  }
# Line 620  DataLazy::DataLazy(DataAbstract_ptr left Line 647  DataLazy::DataLazy(DataAbstract_ptr left
647      m_readytype='C';      m_readytype='C';
648     }     }
649     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
650     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
651     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
652       LazyNodeSetup();
653     SIZELIMIT     SIZELIMIT
654  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
655  }  }
# Line 651  DataLazy::DataLazy(DataAbstract_ptr left Line 677  DataLazy::DataLazy(DataAbstract_ptr left
677     }     }
678     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
679     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
680     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
681     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
682     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
683       LazyNodeSetup();
684     SIZELIMIT     SIZELIMIT
685  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
686  }  }
# Line 682  DataLazy::DataLazy(DataAbstract_ptr left Line 707  DataLazy::DataLazy(DataAbstract_ptr left
707     }     }
708     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
709     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
710     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
711     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
712     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
713       LazyNodeSetup();
714     SIZELIMIT     SIZELIMIT
715  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
716  }  }
# Line 714  DataLazy::DataLazy(DataAbstract_ptr left Line 738  DataLazy::DataLazy(DataAbstract_ptr left
738     }     }
739     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
740     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
741     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
742     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
743     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
744       LazyNodeSetup();
745     SIZELIMIT     SIZELIMIT
746  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
747  }  }
748    
749  DataLazy::~DataLazy()  
750    namespace
751  {  {
 }  
752    
753        inline int max3(int a, int b, int c)
754        {
755        int t=(a>b?a:b);
756        return (t>c?t:c);
757    
758  int      }
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
759  }  }
760    
761    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
762  size_t      : parent(left->getFunctionSpace(), left->getShape()),
763  DataLazy::getMaxSampleSize() const      m_op(CONDEVAL),
764        m_axis_offset(0),
765        m_transpose(0),
766        m_tol(0)
767  {  {
768      return m_maxsamplesize;  
769       DataLazy_ptr lmask;
770       DataLazy_ptr lleft;
771       DataLazy_ptr lright;
772       if (!mask->isLazy())
773       {
774        lmask=DataLazy_ptr(new DataLazy(mask));
775       }
776       else
777       {
778        lmask=dynamic_pointer_cast<DataLazy>(mask);
779       }
780       if (!left->isLazy())
781       {
782        lleft=DataLazy_ptr(new DataLazy(left));
783       }
784       else
785       {
786        lleft=dynamic_pointer_cast<DataLazy>(left);
787       }
788       if (!right->isLazy())
789       {
790        lright=DataLazy_ptr(new DataLazy(right));
791       }
792       else
793       {
794        lright=dynamic_pointer_cast<DataLazy>(right);
795       }
796       m_readytype=lmask->m_readytype;
797       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
798       {
799        throw DataException("Programmer Error - condEval arguments must have the same readytype");
800       }
801       m_left=lleft;
802       m_right=lright;
803       m_mask=lmask;
804       m_samplesize=getNumDPPSample()*getNoValues();
805       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
806       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
807       LazyNodeSetup();
808       SIZELIMIT
809    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
810  }  }
811    
812    
813    
814  size_t  DataLazy::~DataLazy()
 DataLazy::getSampleBufferSize() const  
815  {  {
816      return m_maxsamplesize*(max(1,m_buffsRequired));     delete[] m_sampleids;
817  }  }
818    
819    
820  /*  /*
821    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
822    This does the work for the collapse method.    This does the work for the collapse method.
823    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
824  */  */
825  DataReady_ptr  DataReady_ptr
826  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
827  {  {
828    if (m_readytype=='E')    if (m_readytype=='E')
829    { // this is more an efficiency concern than anything else    { // this is more an efficiency concern than anything else
# Line 891  DataLazy::collapseToReady() Line 959  DataLazy::collapseToReady()
959      case SWAP:      case SWAP:
960      result=left.swapaxes(m_axis_offset, m_transpose);      result=left.swapaxes(m_axis_offset, m_transpose);
961      break;      break;
962        case MINVAL:
963        result=left.minval();
964        break;
965        case MAXVAL:
966        result=left.minval();
967        break;
968      default:      default:
969      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
970    }    }
# Line 904  DataLazy::collapseToReady() Line 978  DataLazy::collapseToReady()
978     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
979  */  */
980  void  void
981  DataLazy::collapse()  DataLazy::collapse() const
982  {  {
983    if (m_op==IDENTITY)    if (m_op==IDENTITY)
984    {    {
# Line 918  DataLazy::collapse() Line 992  DataLazy::collapse()
992    m_op=IDENTITY;    m_op=IDENTITY;
993  }  }
994    
995  /*  
996    \brief Compute the value of the expression (unary operation) for the given sample.  
997    \return Vector which stores the value of the subexpression for the given sample.  
998    \param v A vector to store intermediate results.  
999    \param offset Index in v to begin storing results.  
1000    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1001    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1002        {\
1003    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1004    If the result is stored in v it should be stored at the offset given.        { \
1005    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1006  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1007  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1008  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1009             lroffset+=leftstep; \
1010             rroffset+=rightstep; \
1011          }\
1012          lroffset+=oleftstep;\
1013          rroffset+=orightstep;\
1014        }
1015    
1016    
1017    // The result will be stored in m_samples
1018    // The return value is a pointer to the DataVector, offset is the offset within the return value
1019    const DataTypes::ValueType*
1020    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1021    {
1022    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1023        // collapse so we have a 'E' node or an IDENTITY for some other type
1024      if (m_readytype!='E' && m_op!=IDENTITY)
1025      {
1026        collapse();
1027      }
1028      if (m_op==IDENTITY)  
1029      {
1030        const ValueType& vec=m_id->getVectorRO();
1031        roffset=m_id->getPointOffset(sampleNo, 0);
1032    #ifdef LAZY_STACK_PROF
1033    int x;
1034    if (&x<stackend[omp_get_thread_num()])
1035    {
1036           stackend[omp_get_thread_num()]=&x;
1037    }
1038    #endif
1039        return &(vec);
1040      }
1041      if (m_readytype!='E')
1042      {
1043        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1044      }
1045      if (m_sampleids[tid]==sampleNo)
1046      {
1047        roffset=tid*m_samplesize;
1048        return &(m_samples);        // sample is already resolved
1049      }
1050      m_sampleids[tid]=sampleNo;
1051    
1052      switch (getOpgroup(m_op))
1053      {
1054      case G_UNARY:
1055      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1056      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1057      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1058      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1059      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1060      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1061      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1062      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1063      default:
1064        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1065      }
1066    }
1067    
1068    const DataTypes::ValueType*
1069    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1070  {  {
1071      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1072      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1073      // processing single points.      // processing single points.
1074        // we will also know we won't get identity nodes
1075    if (m_readytype!='E')    if (m_readytype!='E')
1076    {    {
1077      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1078    }    }
1079    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1080    const double* left=&((*vleft)[roffset]);    {
1081    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1082    roffset=offset;    }
1083      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1084      const double* left=&((*leftres)[roffset]);
1085      roffset=m_samplesize*tid;
1086      double* result=&(m_samples[roffset]);
1087    switch (m_op)    switch (m_op)
1088    {    {
1089      case SIN:        case SIN:  
# Line 1053  DataLazy::resolveUnary(ValueType& v, siz Line 1193  DataLazy::resolveUnary(ValueType& v, siz
1193      default:      default:
1194      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1195    }    }
1196    return &v;    return &(m_samples);
1197  }  }
1198    
1199    
1200    const DataTypes::ValueType*
1201    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1202    {
1203        // we assume that any collapsing has been done before we get here
1204        // since we only have one argument we don't need to think about only
1205        // processing single points.
1206        // we will also know we won't get identity nodes
1207      if (m_readytype!='E')
1208      {
1209        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1210      }
1211      if (m_op==IDENTITY)
1212      {
1213        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1214      }
1215      size_t loffset=0;
1216      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1217    
1218      roffset=m_samplesize*tid;
1219      unsigned int ndpps=getNumDPPSample();
1220      unsigned int psize=DataTypes::noValues(m_left->getShape());
1221      double* result=&(m_samples[roffset]);
1222      switch (m_op)
1223      {
1224        case MINVAL:
1225        {
1226          for (unsigned int z=0;z<ndpps;++z)
1227          {
1228            FMin op;
1229            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1230            loffset+=psize;
1231            result++;
1232          }
1233        }
1234        break;
1235        case MAXVAL:
1236        {
1237          for (unsigned int z=0;z<ndpps;++z)
1238          {
1239          FMax op;
1240          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1241          loffset+=psize;
1242          result++;
1243          }
1244        }
1245        break;
1246        default:
1247        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1248      }
1249      return &(m_samples);
1250    }
1251    
1252    const DataTypes::ValueType*
1253    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1254  {  {
1255      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1256      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1257      // processing single points.      // processing single points.
1258    if (m_readytype!='E')    if (m_readytype!='E')
1259    {    {
1260      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1261    }    }
1262      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1263    size_t subroffset=roffset+m_samplesize;    {
1264  LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1265    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    }
1266    roffset=offset;    size_t subroffset;
1267      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1268      roffset=m_samplesize*tid;
1269    size_t loop=0;    size_t loop=0;
1270    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1271    size_t step=getNoValues();    size_t step=getNoValues();
1272      size_t offset=roffset;
1273    switch (m_op)    switch (m_op)
1274    {    {
1275      case SYM:      case SYM:
1276      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1277      {      {
1278          DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1279          subroffset+=step;          subroffset+=step;
1280          offset+=step;          offset+=step;
1281      }      }
# Line 1104  LAZYDEBUG(cerr << "subroffset=" << subro Line 1283  LAZYDEBUG(cerr << "subroffset=" << subro
1283      case NSYM:      case NSYM:
1284      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1285      {      {
1286          DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1287          subroffset+=step;          subroffset+=step;
1288          offset+=step;          offset+=step;
1289      }      }
# Line 1112  LAZYDEBUG(cerr << "subroffset=" << subro Line 1291  LAZYDEBUG(cerr << "subroffset=" << subro
1291      default:      default:
1292      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1293    }    }
1294    return &v;    return &m_samples;
1295  }  }
1296    
1297  /*  const DataTypes::ValueType*
1298    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1299  {  {
1300      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1301      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1302      // processing single points.      // processing single points.
1303    if (m_readytype!='E')    if (m_readytype!='E')
1304    {    {
1305      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1306      }
1307      if (m_op==IDENTITY)
1308      {
1309        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1310    }    }
     // since we can't write the result over the input, we need a result offset further along  
1311    size_t subroffset;    size_t subroffset;
1312    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    size_t offset;
1313  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1314  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)    roffset=m_samplesize*tid;
1315    roffset=offset;    offset=roffset;
1316    size_t loop=0;    size_t loop=0;
1317    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1318    size_t outstep=getNoValues();    size_t outstep=getNoValues();
1319    size_t instep=m_left->getNoValues();    size_t instep=m_left->getNoValues();
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
1320    switch (m_op)    switch (m_op)
1321    {    {
1322      case TRACE:      case TRACE:
1323      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1324      {      {
1325  size_t zz=sampleNo*getNumDPPSample()+loop;              DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "point=" <<  zz<< endl;)  
 LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)  
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
1326          subroffset+=instep;          subroffset+=instep;
1327          offset+=outstep;          offset+=outstep;
1328      }      }
# Line 1174  LAZYDEBUG(cerr << "Result of trace=" << Line 1330  LAZYDEBUG(cerr << "Result of trace=" <<
1330      case TRANS:      case TRANS:
1331      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1332      {      {
1333              DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);              DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1334          subroffset+=instep;          subroffset+=instep;
1335          offset+=outstep;          offset+=outstep;
1336      }      }
# Line 1182  LAZYDEBUG(cerr << "Result of trace=" << Line 1338  LAZYDEBUG(cerr << "Result of trace=" <<
1338      default:      default:
1339      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1340    }    }
1341    return &v;    return &m_samples;
1342  }  }
1343    
1344    
1345  /*  const DataTypes::ValueType*
1346    \brief Compute the value of the expression (unary operation with int params) for the given sample.  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1347  {  {
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
1348    if (m_readytype!='E')    if (m_readytype!='E')
1349    {    {
1350      throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1351      }
1352      if (m_op==IDENTITY)
1353      {
1354        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1355    }    }
     // since we can't write the result over the input, we need a result offset further along  
1356    size_t subroffset;    size_t subroffset;
1357    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    size_t offset;
1358  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1359  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)    roffset=m_samplesize*tid;
1360    roffset=offset;    offset=roffset;
1361    size_t loop=0;    size_t loop=0;
1362    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1363    size_t outstep=getNoValues();    size_t outstep=getNoValues();
1364    size_t instep=m_left->getNoValues();    size_t instep=m_left->getNoValues();
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
1365    switch (m_op)    switch (m_op)
1366    {    {
1367      case SWAP:      case SWAP:
1368      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1369      {      {
1370              DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);              DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1371          subroffset+=instep;          subroffset+=instep;
1372          offset+=outstep;          offset+=outstep;
1373      }      }
1374      break;      break;
1375      default:      default:
1376      throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1377    }    }
1378    return &v;    return &m_samples;
1379  }  }
1380    
1381    const DataTypes::ValueType*
1382    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1383    {
1384      if (m_readytype!='E')
1385      {
1386        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1387      }
1388      if (m_op!=CONDEVAL)
1389      {
1390        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1391      }
1392      size_t subroffset;
1393    
1394      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1395      const ValueType* srcres=0;
1396      if ((*maskres)[subroffset]>0)
1397      {
1398        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1399      }
1400      else
1401      {
1402        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1403      }
1404    
1405  #define PROC_OP(TYPE,X)                               \    // Now we need to copy the result
1406      for (int j=0;j<onumsteps;++j)\  
1407      {\    roffset=m_samplesize*tid;
1408        for (int i=0;i<numsteps;++i,resultp+=resultStep) \    for (int i=0;i<m_samplesize;++i)
1409        { \    {
1410  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\      m_samples[roffset+i]=(*srcres)[subroffset+i];  
1411  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\    }
1412           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  
1413  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \    return &m_samples;
1414           lroffset+=leftstep; \  }
          rroffset+=rightstep; \  
       }\  
       lroffset+=oleftstep;\  
       rroffset+=orightstep;\  
     }  
1415    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1416  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1417  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1418  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 1274  LAZYDEBUG(cout << " result=      " << re Line 1422  LAZYDEBUG(cout << " result=      " << re
1422  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1423  // For example, 2+Vector.  // For example, 2+Vector.
1424  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1425  DataTypes::ValueType*  const DataTypes::ValueType*
1426  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1427  {  {
1428  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1429    
# Line 1387  LAZYDEBUG(cout << "Resolve binary: " << Line 1535  LAZYDEBUG(cout << "Resolve binary: " <<
1535    
1536    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1537      // Get the values of sub-expressions      // Get the values of sub-expressions
1538    const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1539      // calcBufss for why we can't put offset as the 2nd param above    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
   const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
1540  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1541  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1542  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
# Line 1399  LAZYDEBUG(cout << "onumsteps=" << onumst Line 1545  LAZYDEBUG(cout << "onumsteps=" << onumst
1545  LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1546  LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1547    
1548    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1549    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1550    
1551    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1552      roffset=m_samplesize*tid;
1553      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1554    switch(m_op)    switch(m_op)
1555    {    {
1556      case ADD:      case ADD:
# Line 1421  LAZYDEBUG(cout << "" << LS << RS << LN < Line 1571  LAZYDEBUG(cout << "" << LS << RS << LN <
1571      default:      default:
1572      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1573    }    }
1574    roffset=offset;  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1575    return &v;    return &m_samples;
1576  }  }
1577    
1578    
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1579  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1580  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1581  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1582  DataTypes::ValueType*  const DataTypes::ValueType*
1583  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1584  {  {
1585  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1586    
1587    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1588      // first work out which of the children are expanded      // first work out which of the children are expanded
1589    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1590    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1591    int steps=getNumDPPSample();    int steps=getNumDPPSample();
 /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
1592    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1593    int rightStep=(rightExp?m_right->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1594    
1595    int resultStep=getNoValues();    int resultStep=getNoValues();
1596      // Get the values of sub-expressions (leave a gap of one sample for the result).    roffset=m_samplesize*tid;
1597    int gap=offset+m_samplesize;      size_t offset=roffset;
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
   
   const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);  
   gap+=m_left->getMaxSampleSize();  
   
1598    
1599  LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)    const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1600    
1601      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
   const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);  
1602    
1603  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1604  cout << getNoValues() << endl;)  cout << getNoValues() << endl;)
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
1605    
1606  for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
1607  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1608  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1609  LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
# Line 1499  LAZYDEBUG(cout << "m_samplesize=" << m_s Line 1612  LAZYDEBUG(cout << "m_samplesize=" << m_s
1612  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1613  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1614    
1615    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1616    switch(m_op)    switch(m_op)
1617    {    {
1618      case PROD:      case PROD:
1619      for (int i=0;i<steps;++i,resultp+=resultStep)      for (int i=0;i<steps;++i,resultp+=resultStep)
1620      {      {
   
 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)  
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
1621            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1622            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1623    
# Line 1518  LAZYDEBUG(cout << DataTypes::pointToStri Line 1626  LAZYDEBUG(cout << DataTypes::pointToStri
1626    
1627            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1628    
 LAZYDEBUG(cout << "Results--\n";  
 {  
   DataVector dv(getNoValues());  
 for (int z=0;z<getNoValues();++z)  
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
1629        lroffset+=leftStep;        lroffset+=leftStep;
1630        rroffset+=rightStep;        rroffset+=rightStep;
1631      }      }
# Line 1539  cout << "\nWritten to: " << resultp << " Line 1634  cout << "\nWritten to: " << resultp << "
1634      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1635    }    }
1636    roffset=offset;    roffset=offset;
1637    return &v;    return &m_samples;
1638  }  }
1639    
1640    
   
 /*  
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
1641  const DataTypes::ValueType*  const DataTypes::ValueType*
1642  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset) const
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
   
 }  
   
 const DataTypes::ValueType*  
 DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  
1643  {  {
1644  #ifdef _OPENMP  #ifdef _OPENMP
1645      int tid=omp_get_thread_num();      int tid=omp_get_thread_num();
1646  #else  #else
1647      int tid=0;      int tid=0;
1648  #endif  #endif
1649      return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);  
1650    #ifdef LAZY_STACK_PROF
1651        stackstart[tid]=&tid;
1652        stackend[tid]=&tid;
1653        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1654        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1655        #pragma omp critical
1656        if (d>maxstackuse)
1657        {
1658    cout << "Max resolve Stack use " << d << endl;
1659            maxstackuse=d;
1660        }
1661        return r;
1662    #else
1663        return resolveNodeSample(tid, sampleNo, roffset);
1664    #endif
1665  }  }
1666    
1667    
1668  // This needs to do the work of the idenity constructor  // This needs to do the work of the identity constructor
1669  void  void
1670  DataLazy::resolveToIdentity()  DataLazy::resolveToIdentity()
1671  {  {
1672     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1673      return;      return;
1674     DataReady_ptr p=resolve();     DataReady_ptr p=resolveNodeWorker();
1675     makeIdentity(p);     makeIdentity(p);
1676  }  }
1677    
# Line 1635  void DataLazy::makeIdentity(const DataRe Line 1687  void DataLazy::makeIdentity(const DataRe
1687     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1688     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1689     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1690     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1691     m_left.reset();     m_left.reset();
1692     m_right.reset();     m_right.reset();
1693  }  }
1694    
1695  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
1696  DataReady_ptr  DataReady_ptr
1697  DataLazy::resolve()  DataLazy::resolve()
1698  {  {
1699        resolveToIdentity();
1700        return m_id;
1701    }
1702    
1703    
1704    /* This is really a static method but I think that caused problems in windows */
1705    void
1706    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1707    {
1708      if (dats.empty())
1709      {
1710        return;
1711      }
1712      vector<DataLazy*> work;
1713      FunctionSpace fs=dats[0]->getFunctionSpace();
1714      bool match=true;
1715      for (int i=dats.size()-1;i>=0;--i)
1716      {
1717        if (dats[i]->m_readytype!='E')
1718        {
1719            dats[i]->collapse();
1720        }
1721        if (dats[i]->m_op!=IDENTITY)
1722        {
1723            work.push_back(dats[i]);
1724            if (fs!=dats[i]->getFunctionSpace())
1725            {
1726                match=false;
1727            }
1728        }
1729      }
1730      if (work.empty())
1731      {
1732        return;     // no work to do
1733      }
1734      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1735      {     // it is possible that dats[0] is one of the objects which we discarded and
1736            // all the other functionspaces match.
1737        vector<DataExpanded*> dep;
1738        vector<ValueType*> vecs;
1739        for (int i=0;i<work.size();++i)
1740        {
1741            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1742            vecs.push_back(&(dep[i]->getVectorRW()));
1743        }
1744        int totalsamples=work[0]->getNumSamples();
1745        const ValueType* res=0; // Storage for answer
1746        int sample;
1747        #pragma omp parallel private(sample, res)
1748        {
1749            size_t roffset=0;
1750            #pragma omp for schedule(static)
1751            for (sample=0;sample<totalsamples;++sample)
1752            {
1753            roffset=0;
1754            int j;
1755            for (j=work.size()-1;j>=0;--j)
1756            {
1757    #ifdef _OPENMP
1758                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1759    #else
1760                    res=work[j]->resolveNodeSample(0,sample,roffset);
1761    #endif
1762                    DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1763                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1764            }
1765            }
1766        }
1767        // Now we need to load the new results as identity ops into the lazy nodes
1768        for (int i=work.size()-1;i>=0;--i)
1769        {
1770            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1771        }
1772      }
1773      else  // functionspaces do not match
1774      {
1775        for (int i=0;i<work.size();++i)
1776        {
1777            work[i]->resolveToIdentity();
1778        }
1779      }
1780    }
1781    
1782    
1783  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
1784  LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  // This version of resolve uses storage in each node to hold results
1785    DataReady_ptr
1786    DataLazy::resolveNodeWorker()
1787    {
1788    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1789    {    {
1790      collapse();      collapse();
# Line 1659  LAZYDEBUG(cout << "Buffers=" << m_buffsR Line 1794  LAZYDEBUG(cout << "Buffers=" << m_buffsR
1794      return m_id;      return m_id;
1795    }    }
1796      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
1797    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1798    ValueType& resvec=result->getVectorRW();    ValueType& resvec=result->getVectorRW();
1799    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1800    
1801    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1802    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1803    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
   size_t resoffset=0;       // where in the vector to find the answer  
1804  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1805    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1806    {    {
1807        if (sample==0)  {ENABLEDEBUG}      size_t roffset=0;
1808    #ifdef LAZY_STACK_PROF
1809  //       if (sample==5800/getNumDPPSample())  {ENABLEDEBUG}      stackstart[omp_get_thread_num()]=&roffset;
1810  LAZYDEBUG(cout << "################################# " << sample << endl;)      stackend[omp_get_thread_num()]=&roffset;
1811    #endif
1812        #pragma omp for schedule(static)
1813        for (sample=0;sample<totalsamples;++sample)
1814        {
1815            roffset=0;
1816  #ifdef _OPENMP  #ifdef _OPENMP
1817      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1818  #else  #else
1819      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1820  #endif  #endif
1821  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1822  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1823      outoffset=result->getPointOffset(sample,0);              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1824  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1825      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      }
1826      {    }
1827  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  #ifdef LAZY_STACK_PROF
1828      resvec[outoffset]=(*res)[resoffset];    for (int i=0;i<getNumberOfThreads();++i)
1829      }    {
1830  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)      size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1831  LAZYDEBUG(cerr << "*********************************" << endl;)  //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1832      DISABLEDEBUG      if (r>maxstackuse)
1833        {
1834            maxstackuse=r;
1835        }
1836    }    }
1837      cout << "Max resolve Stack use=" << maxstackuse << endl;
1838    #endif
1839    return resptr;    return resptr;
1840  }  }
1841    
# Line 1708  std::string Line 1843  std::string
1843  DataLazy::toString() const  DataLazy::toString() const
1844  {  {
1845    ostringstream oss;    ostringstream oss;
1846    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1847    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1848      {
1849      case 1:   // tree format
1850        oss << endl;
1851        intoTreeString(oss,"");
1852        break;
1853      case 2:   // just the depth
1854        break;
1855      default:
1856        intoString(oss);
1857        break;
1858      }
1859    return oss.str();    return oss.str();
1860  }  }
1861    
# Line 1750  DataLazy::intoString(ostringstream& oss) Line 1896  DataLazy::intoString(ostringstream& oss)
1896    case G_UNARY_P:    case G_UNARY_P:
1897    case G_NP1OUT:    case G_NP1OUT:
1898    case G_NP1OUT_P:    case G_NP1OUT_P:
1899      case G_REDUCTION:
1900      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1901      m_left->intoString(oss);      m_left->intoString(oss);
1902      oss << ')';      oss << ')';
# Line 1767  DataLazy::intoString(ostringstream& oss) Line 1914  DataLazy::intoString(ostringstream& oss)
1914      oss << ", " << m_axis_offset << ", " << m_transpose;      oss << ", " << m_axis_offset << ", " << m_transpose;
1915      oss << ')';      oss << ')';
1916      break;      break;
1917      case G_CONDEVAL:
1918        oss << opToString(m_op)<< '(' ;
1919        m_mask->intoString(oss);
1920        oss << " ? ";
1921        m_left->intoString(oss);
1922        oss << " : ";
1923        m_right->intoString(oss);
1924        oss << ')';
1925        break;
1926    default:    default:
1927      oss << "UNKNOWN";      oss << "UNKNOWN";
1928    }    }
1929  }  }
1930    
1931    
1932    void
1933    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1934    {
1935      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1936      switch (getOpgroup(m_op))
1937      {
1938      case G_IDENTITY:
1939        if (m_id->isExpanded())
1940        {
1941           oss << "E";
1942        }
1943        else if (m_id->isTagged())
1944        {
1945          oss << "T";
1946        }
1947        else if (m_id->isConstant())
1948        {
1949          oss << "C";
1950        }
1951        else
1952        {
1953          oss << "?";
1954        }
1955        oss << '@' << m_id.get() << endl;
1956        break;
1957      case G_BINARY:
1958        oss << opToString(m_op) << endl;
1959        indent+='.';
1960        m_left->intoTreeString(oss, indent);
1961        m_right->intoTreeString(oss, indent);
1962        break;
1963      case G_UNARY:
1964      case G_UNARY_P:
1965      case G_NP1OUT:
1966      case G_NP1OUT_P:
1967      case G_REDUCTION:
1968        oss << opToString(m_op) << endl;
1969        indent+='.';
1970        m_left->intoTreeString(oss, indent);
1971        break;
1972      case G_TENSORPROD:
1973        oss << opToString(m_op) << endl;
1974        indent+='.';
1975        m_left->intoTreeString(oss, indent);
1976        m_right->intoTreeString(oss, indent);
1977        break;
1978      case G_NP1OUT_2P:
1979        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1980        indent+='.';
1981        m_left->intoTreeString(oss, indent);
1982        break;
1983      default:
1984        oss << "UNKNOWN";
1985      }
1986    }
1987    
1988    
1989  DataAbstract*  DataAbstract*
1990  DataLazy::deepCopy()  DataLazy::deepCopy()
1991  {  {
1992    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1993    {    {
1994    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1995    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1996      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1997      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1998    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1999    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2000    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2001      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2002      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003    default:    default:
2004      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2005    }    }
2006  }  }
2007    
2008    
2009    
2010  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2011  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2012  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
# Line 1876  DataLazy::setToZero() Line 2095  DataLazy::setToZero()
2095  //   m_readytype='C';  //   m_readytype='C';
2096  //   m_buffsRequired=1;  //   m_buffsRequired=1;
2097    
2098      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2099    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
   
2100  }  }
2101    
2102  bool  bool

Legend:
Removed from v.2496  
changed lines
  Added in v.4657

  ViewVC Help
Powered by ViewVC 1.1.26