/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/escript/src/DataLazy.cpp revision 2496 by jfenwick, Fri Jun 26 06:09:47 2009 UTC trunk/escriptcore/src/DataLazy.cpp revision 5464 by jfenwick, Sat Feb 14 00:25:03 2015 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2015 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development 2012-2013 by School of Earth Sciences
13    * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14    *
15    *****************************************************************************/
16    
17    #define ESNEEDPYTHON
18    #include "esysUtils/first.h"
19    
20  #include "DataLazy.h"  #include "DataLazy.h"
21  #ifdef USE_NETCDF  #include "esysUtils/Esys_MPI.h"
 #include <netcdfcpp.h>  
 #endif  
 #ifdef PASO_MPI  
 #include <mpi.h>  
 #endif  
22  #ifdef _OPENMP  #ifdef _OPENMP
23  #include <omp.h>  #include <omp.h>
24  #endif  #endif
# Line 30  Line 30 
30    
31  #include "EscriptParams.h"  #include "EscriptParams.h"
32    
33    #ifdef USE_NETCDF
34    #include <netcdfcpp.h>
35    #endif
36    
37  #include <iomanip>      // for some fancy formatting in debug  #include <iomanip>      // for some fancy formatting in debug
38    
39  // #define LAZYDEBUG(X) if (privdebug){X;}  // #define LAZYDEBUG(X) if (privdebug){X;}
# Line 42  bool privdebug=false; Line 46  bool privdebug=false;
46  #define DISABLEDEBUG privdebug=false;  #define DISABLEDEBUG privdebug=false;
47  }  }
48    
49  // #define SIZELIMIT  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
50  // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}  
51  //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "SIZE LIMIT EXCEEDED " << m_height << endl;resolveToIdentity();}
52    
53  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  
54    #define SIZELIMIT if (m_height>escript::escriptParams.getTOO_MANY_LEVELS())  {if (escript::escriptParams.getLAZY_VERBOSE()){cerr << "SIZE LIMIT EXCEEDED height=" << m_height << endl;}resolveToIdentity();}
55    
56  /*  /*
57  How does DataLazy work?  How does DataLazy work?
# Line 59  A special operation, IDENTITY, stores an Line 64  A special operation, IDENTITY, stores an
64  This means that all "internal" nodes in the structure are instances of DataLazy.  This means that all "internal" nodes in the structure are instances of DataLazy.
65    
66  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...  Each operation has a string representation as well as an opgroup - eg G_IDENTITY, G_BINARY, ...
67  Note that IDENITY is not considered a unary operation.  Note that IDENTITY is not considered a unary operation.
68    
69  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).  I am avoiding calling the structure formed a tree because it is not guaranteed to be one (eg c=a+a).
70  It must however form a DAG (directed acyclic graph).  It must however form a DAG (directed acyclic graph).
# Line 68  I will refer to individual DataLazy obje Line 73  I will refer to individual DataLazy obje
73  Each node also stores:  Each node also stores:
74  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was  - m_readytype \in {'E','T','C','?'} ~ indicates what sort of DataReady would be produced if the expression was
75      evaluated.      evaluated.
76  - m_buffsrequired ~ the larged number of samples which would need to be kept simultaneously in order to  - m_buffsrequired ~ the large number of samples which would need to be kept simultaneously in order to
77      evaluate the expression.      evaluate the expression.
78  - m_samplesize ~ the number of doubles stored in a sample.  - m_samplesize ~ the number of doubles stored in a sample.
79    
# Line 110  namespace escript Line 115  namespace escript
115  namespace  namespace
116  {  {
117    
118    
119    // enabling this will print out when ever the maximum stacksize used by resolve increases
120    // it assumes _OPENMP is also in use
121    //#define LAZY_STACK_PROF
122    
123    
124    
125    #ifndef _OPENMP
126      #ifdef LAZY_STACK_PROF
127      #undef LAZY_STACK_PROF
128      #endif
129    #endif
130    
131    
132    #ifdef LAZY_STACK_PROF
133    std::vector<void*> stackstart(getNumberOfThreads());
134    std::vector<void*> stackend(getNumberOfThreads());
135    size_t maxstackuse=0;
136    #endif
137    
138  enum ES_opgroup  enum ES_opgroup
139  {  {
140     G_UNKNOWN,     G_UNKNOWN,
# Line 120  enum ES_opgroup Line 145  enum ES_opgroup
145     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
146     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
147     G_TENSORPROD,    // general tensor product     G_TENSORPROD,    // general tensor product
148     G_NP1OUT_2P      // non-pointwise op with one output requiring two params     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
149       G_REDUCTION,     // non-pointwise unary op with a scalar output
150       G_CONDEVAL
151  };  };
152    
153    
# Line 135  string ES_opstrings[]={"UNKNOWN","IDENTI Line 162  string ES_opstrings[]={"UNKNOWN","IDENTI
162              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
163              "prod",              "prod",
164              "transpose", "trace",              "transpose", "trace",
165              "swapaxes"};              "swapaxes",
166  int ES_opcount=41;              "minval", "maxval",
167                "condEval"};
168    int ES_opcount=44;
169  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
170              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
171              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 146  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 175  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
175              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
176              G_TENSORPROD,              G_TENSORPROD,
177              G_NP1OUT_P, G_NP1OUT_P,              G_NP1OUT_P, G_NP1OUT_P,
178              G_NP1OUT_2P};              G_NP1OUT_2P,
179                G_REDUCTION, G_REDUCTION,
180                G_CONDEVAL};
181  inline  inline
182  ES_opgroup  ES_opgroup
183  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 167  resultFS(DataAbstract_ptr left, DataAbst Line 198  resultFS(DataAbstract_ptr left, DataAbst
198    FunctionSpace r=right->getFunctionSpace();    FunctionSpace r=right->getFunctionSpace();
199    if (l!=r)    if (l!=r)
200    {    {
201      if (r.probeInterpolation(l))      signed char res=r.getDomain()->preferredInterpolationOnDomain(r.getTypeCode(), l.getTypeCode());
202        if (res==1)
203      {      {
204      return l;      return l;
205      }      }
206      if (l.probeInterpolation(r))      if (res==-1)
207      {      {
208      return r;      return r;
209      }      }
# Line 191  resultShape(DataAbstract_ptr left, DataA Line 223  resultShape(DataAbstract_ptr left, DataA
223        {        {
224          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
225        }        }
226    
227        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
228        {        {
229          return right->getShape();          return right->getShape();
# Line 218  resultShape(DataAbstract_ptr left, ES_op Line 251  resultShape(DataAbstract_ptr left, ES_op
251          int rank=left->getRank();          int rank=left->getRank();
252          if (axis_offset<0 || axis_offset>rank)          if (axis_offset<0 || axis_offset>rank)
253          {          {
254                 throw DataException("Error - Data::transpose must have 0 <= axis_offset <= rank=" + rank);              stringstream e;
255              }              e << "Error - Data::transpose must have 0 <= axis_offset <= rank=" << rank;
256              for (int i=0; i<rank; i++)              throw DataException(e.str());
257            }
258            for (int i=0; i<rank; i++)
259          {          {
260             int index = (axis_offset+i)%rank;             int index = (axis_offset+i)%rank;
261                 sh.push_back(s[index]); // Append to new shape             sh.push_back(s[index]); // Append to new shape
262              }          }
263          return sh;          return sh;
264         }         }
265      break;      break;
# Line 303  SwapShape(DataAbstract_ptr left, const i Line 338  SwapShape(DataAbstract_ptr left, const i
338          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");          throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
339       }       }
340       if (axis0<0 || axis0>rank-1) {       if (axis0<0 || axis0>rank-1) {
341          throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);          stringstream e;
342            e << "Error - Data::swapaxes: axis0 must be between 0 and rank-1=" << (rank-1);
343            throw DataException(e.str());
344       }       }
345       if (axis1<0 || axis1>rank-1) {       if (axis1<0 || axis1>rank-1) {
346           throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);          stringstream e;
347            e << "Error - Data::swapaxes: axis1 must be between 0 and rank-1=" << (rank-1);
348            throw DataException(e.str());
349       }       }
350       if (axis0 == axis1) {       if (axis0 == axis1) {
351           throw DataException("Error - Data::swapaxes: axis indices must be different.");           throw DataException("Error - Data::swapaxes: axis indices must be different.");
# Line 395  GTPShape(DataAbstract_ptr left, DataAbst Line 434  GTPShape(DataAbstract_ptr left, DataAbst
434    return shape2;    return shape2;
435  }  }
436    
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
437  }   // end anonymous namespace  }   // end anonymous namespace
438    
439    
# Line 435  opToString(ES_optype op) Line 449  opToString(ES_optype op)
449    return ES_opstrings[op];    return ES_opstrings[op];
450  }  }
451    
452    void DataLazy::LazyNodeSetup()
453    {
454    #ifdef _OPENMP
455        int numthreads=omp_get_max_threads();
456        m_samples.resize(numthreads*m_samplesize);
457        m_sampleids=new int[numthreads];
458        for (int i=0;i<numthreads;++i)
459        {
460            m_sampleids[i]=-1;  
461        }
462    #else
463        m_samples.resize(m_samplesize);
464        m_sampleids=new int[1];
465        m_sampleids[0]=-1;
466    #endif  // _OPENMP
467    }
468    
469    
470    // Creates an identity node
471  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
472      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
473        ,m_sampleids(0),
474        m_samples(1)
475  {  {
476     if (p->isLazy())     if (p->isLazy())
477     {     {
# Line 456  LAZYDEBUG(cout << "Wrapping " << dr.get( Line 490  LAZYDEBUG(cout << "Wrapping " << dr.get(
490  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
491  }  }
492    
   
   
   
493  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
494      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
495      m_op(op),      m_op(op),
496      m_axis_offset(0),      m_axis_offset(0),
497      m_transpose(0),      m_transpose(0),
498      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
499  {  {
500     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
501     {     {
502      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
503     }     }
# Line 482  DataLazy::DataLazy(DataAbstract_ptr left Line 513  DataLazy::DataLazy(DataAbstract_ptr left
513     }     }
514     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
515     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
516     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
517     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
518     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
519       LazyNodeSetup();
520     SIZELIMIT     SIZELIMIT
521  }  }
522    
# Line 553  LAZYDEBUG(cout << "Right " << right.get( Line 583  LAZYDEBUG(cout << "Right " << right.get(
583      m_readytype='C';      m_readytype='C';
584     }     }
585     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
586     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
587     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
588       LazyNodeSetup();
589     SIZELIMIT     SIZELIMIT
590  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
591  }  }
# Line 620  DataLazy::DataLazy(DataAbstract_ptr left Line 649  DataLazy::DataLazy(DataAbstract_ptr left
649      m_readytype='C';      m_readytype='C';
650     }     }
651     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
652     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
653     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
654       LazyNodeSetup();
655     SIZELIMIT     SIZELIMIT
656  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
657  }  }
# Line 651  DataLazy::DataLazy(DataAbstract_ptr left Line 679  DataLazy::DataLazy(DataAbstract_ptr left
679     }     }
680     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
681     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
682     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
683     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
684     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
685       LazyNodeSetup();
686     SIZELIMIT     SIZELIMIT
687  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
688  }  }
# Line 682  DataLazy::DataLazy(DataAbstract_ptr left Line 709  DataLazy::DataLazy(DataAbstract_ptr left
709     }     }
710     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
711     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
712     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
713     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
714     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
715       LazyNodeSetup();
716     SIZELIMIT     SIZELIMIT
717  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
718  }  }
# Line 714  DataLazy::DataLazy(DataAbstract_ptr left Line 740  DataLazy::DataLazy(DataAbstract_ptr left
740     }     }
741     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
742     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
743     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
744     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
745     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
746       LazyNodeSetup();
747     SIZELIMIT     SIZELIMIT
748  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
749  }  }
750    
751  DataLazy::~DataLazy()  
752    namespace
753  {  {
 }  
754    
755        inline int max3(int a, int b, int c)
756        {
757        int t=(a>b?a:b);
758        return (t>c?t:c);
759    
760  int      }
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
761  }  }
762    
763    DataLazy::DataLazy(DataAbstract_ptr mask, DataAbstract_ptr left, DataAbstract_ptr right/*, double tol*/)
764  size_t      : parent(left->getFunctionSpace(), left->getShape()),
765  DataLazy::getMaxSampleSize() const      m_op(CONDEVAL),
766        m_axis_offset(0),
767        m_transpose(0),
768        m_tol(0)
769  {  {
770      return m_maxsamplesize;  
771       DataLazy_ptr lmask;
772       DataLazy_ptr lleft;
773       DataLazy_ptr lright;
774       if (!mask->isLazy())
775       {
776        lmask=DataLazy_ptr(new DataLazy(mask));
777       }
778       else
779       {
780        lmask=dynamic_pointer_cast<DataLazy>(mask);
781       }
782       if (!left->isLazy())
783       {
784        lleft=DataLazy_ptr(new DataLazy(left));
785       }
786       else
787       {
788        lleft=dynamic_pointer_cast<DataLazy>(left);
789       }
790       if (!right->isLazy())
791       {
792        lright=DataLazy_ptr(new DataLazy(right));
793       }
794       else
795       {
796        lright=dynamic_pointer_cast<DataLazy>(right);
797       }
798       m_readytype=lmask->m_readytype;
799       if ((lleft->m_readytype!=lright->m_readytype) || (lmask->m_readytype!=lleft->m_readytype))
800       {
801        throw DataException("Programmer Error - condEval arguments must have the same readytype");
802       }
803       m_left=lleft;
804       m_right=lright;
805       m_mask=lmask;
806       m_samplesize=getNumDPPSample()*getNoValues();
807       m_children=m_left->m_children+m_right->m_children+m_mask->m_children+1;
808       m_height=max3(m_left->m_height,m_right->m_height,m_mask->m_height)+1;
809       LazyNodeSetup();
810       SIZELIMIT
811    LAZYDEBUG(cout << "(8)Lazy created with " << m_samplesize << endl;)
812  }  }
813    
814    
815    
816  size_t  DataLazy::~DataLazy()
 DataLazy::getSampleBufferSize() const  
817  {  {
818      return m_maxsamplesize*(max(1,m_buffsRequired));     delete[] m_sampleids;
819  }  }
820    
821    
822  /*  /*
823    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
824    This does the work for the collapse method.    This does the work for the collapse method.
825    For reasons of efficiency do not call this method on DataExpanded nodes.    For reasons of efficiency do not call this method on DataExpanded nodes.
826  */  */
827  DataReady_ptr  DataReady_ptr
828  DataLazy::collapseToReady()  DataLazy::collapseToReady() const
829  {  {
830    if (m_readytype=='E')    if (m_readytype=='E')
831    { // this is more an efficiency concern than anything else    { // this is more an efficiency concern than anything else
# Line 891  DataLazy::collapseToReady() Line 961  DataLazy::collapseToReady()
961      case SWAP:      case SWAP:
962      result=left.swapaxes(m_axis_offset, m_transpose);      result=left.swapaxes(m_axis_offset, m_transpose);
963      break;      break;
964        case MINVAL:
965        result=left.minval();
966        break;
967        case MAXVAL:
968        result=left.minval();
969        break;
970      default:      default:
971      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
972    }    }
# Line 904  DataLazy::collapseToReady() Line 980  DataLazy::collapseToReady()
980     the purpose of using DataLazy in the first place).     the purpose of using DataLazy in the first place).
981  */  */
982  void  void
983  DataLazy::collapse()  DataLazy::collapse() const
984  {  {
985    if (m_op==IDENTITY)    if (m_op==IDENTITY)
986    {    {
# Line 918  DataLazy::collapse() Line 994  DataLazy::collapse()
994    m_op=IDENTITY;    m_op=IDENTITY;
995  }  }
996    
997  /*  
998    \brief Compute the value of the expression (unary operation) for the given sample.  
999    \return Vector which stores the value of the subexpression for the given sample.  
1000    \param v A vector to store intermediate results.  
1001    \param offset Index in v to begin storing results.  
1002    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
1003    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
1004        {\
1005    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1006    If the result is stored in v it should be stored at the offset given.        { \
1007    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1008  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1009  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1010  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1011             lroffset+=leftstep; \
1012             rroffset+=rightstep; \
1013          }\
1014          lroffset+=oleftstep;\
1015          rroffset+=orightstep;\
1016        }
1017    
1018    
1019    // The result will be stored in m_samples
1020    // The return value is a pointer to the DataVector, offset is the offset within the return value
1021    const DataTypes::ValueType*
1022    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset) const
1023    {
1024    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1025        // collapse so we have a 'E' node or an IDENTITY for some other type
1026      if (m_readytype!='E' && m_op!=IDENTITY)
1027      {
1028        collapse();
1029      }
1030      if (m_op==IDENTITY)  
1031      {
1032        const ValueType& vec=m_id->getVectorRO();
1033        roffset=m_id->getPointOffset(sampleNo, 0);
1034    #ifdef LAZY_STACK_PROF
1035    int x;
1036    if (&x<stackend[omp_get_thread_num()])
1037    {
1038           stackend[omp_get_thread_num()]=&x;
1039    }
1040    #endif
1041        return &(vec);
1042      }
1043      if (m_readytype!='E')
1044      {
1045        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1046      }
1047      if (m_sampleids[tid]==sampleNo)
1048      {
1049        roffset=tid*m_samplesize;
1050        return &(m_samples);        // sample is already resolved
1051      }
1052      m_sampleids[tid]=sampleNo;
1053    
1054      switch (getOpgroup(m_op))
1055      {
1056      case G_UNARY:
1057      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1058      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1059      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1060      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1061      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1062      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1063      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1064      case G_CONDEVAL: return resolveNodeCondEval(tid, sampleNo, roffset);
1065      default:
1066        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1067      }
1068    }
1069    
1070    const DataTypes::ValueType*
1071    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1072  {  {
1073      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1074      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1075      // processing single points.      // processing single points.
1076        // we will also know we won't get identity nodes
1077    if (m_readytype!='E')    if (m_readytype!='E')
1078    {    {
1079      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1080    }    }
1081    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
1082    const double* left=&((*vleft)[roffset]);    {
1083    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1084    roffset=offset;    }
1085      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1086      const double* left=&((*leftres)[roffset]);
1087      roffset=m_samplesize*tid;
1088      double* result=&(m_samples[roffset]);
1089    switch (m_op)    switch (m_op)
1090    {    {
1091      case SIN:        case SIN:  
# Line 1053  DataLazy::resolveUnary(ValueType& v, siz Line 1195  DataLazy::resolveUnary(ValueType& v, siz
1195      default:      default:
1196      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1197    }    }
1198    return &v;    return &(m_samples);
1199  }  }
1200    
1201    
1202    const DataTypes::ValueType*
1203    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
1204    {
1205        // we assume that any collapsing has been done before we get here
1206        // since we only have one argument we don't need to think about only
1207        // processing single points.
1208        // we will also know we won't get identity nodes
1209      if (m_readytype!='E')
1210      {
1211        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1212      }
1213      if (m_op==IDENTITY)
1214      {
1215        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1216      }
1217      size_t loffset=0;
1218      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1219    
1220      roffset=m_samplesize*tid;
1221      unsigned int ndpps=getNumDPPSample();
1222      unsigned int psize=DataTypes::noValues(m_left->getShape());
1223      double* result=&(m_samples[roffset]);
1224      switch (m_op)
1225      {
1226        case MINVAL:
1227        {
1228          for (unsigned int z=0;z<ndpps;++z)
1229          {
1230            FMin op;
1231            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1232            loffset+=psize;
1233            result++;
1234          }
1235        }
1236        break;
1237        case MAXVAL:
1238        {
1239          for (unsigned int z=0;z<ndpps;++z)
1240          {
1241          FMax op;
1242          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1243          loffset+=psize;
1244          result++;
1245          }
1246        }
1247        break;
1248        default:
1249        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1250      }
1251      return &(m_samples);
1252    }
1253    
1254    const DataTypes::ValueType*
1255    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1256  {  {
1257      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1258      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1259      // processing single points.      // processing single points.
1260    if (m_readytype!='E')    if (m_readytype!='E')
1261    {    {
1262      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1263    }    }
1264      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1265    size_t subroffset=roffset+m_samplesize;    {
1266  LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1267    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    }
1268    roffset=offset;    size_t subroffset;
1269      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1270      roffset=m_samplesize*tid;
1271    size_t loop=0;    size_t loop=0;
1272    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1273    size_t step=getNoValues();    size_t step=getNoValues();
1274      size_t offset=roffset;
1275    switch (m_op)    switch (m_op)
1276    {    {
1277      case SYM:      case SYM:
1278      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1279      {      {
1280          DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1281          subroffset+=step;          subroffset+=step;
1282          offset+=step;          offset+=step;
1283      }      }
# Line 1104  LAZYDEBUG(cerr << "subroffset=" << subro Line 1285  LAZYDEBUG(cerr << "subroffset=" << subro
1285      case NSYM:      case NSYM:
1286      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1287      {      {
1288          DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1289          subroffset+=step;          subroffset+=step;
1290          offset+=step;          offset+=step;
1291      }      }
# Line 1112  LAZYDEBUG(cerr << "subroffset=" << subro Line 1293  LAZYDEBUG(cerr << "subroffset=" << subro
1293      default:      default:
1294      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1295    }    }
1296    return &v;    return &m_samples;
1297  }  }
1298    
1299  /*  const DataTypes::ValueType*
1300    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1301  {  {
1302      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1303      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1304      // processing single points.      // processing single points.
1305    if (m_readytype!='E')    if (m_readytype!='E')
1306    {    {
1307      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1308      }
1309      if (m_op==IDENTITY)
1310      {
1311        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1312    }    }
     // since we can't write the result over the input, we need a result offset further along  
1313    size_t subroffset;    size_t subroffset;
1314    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    size_t offset;
1315  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1316  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)    roffset=m_samplesize*tid;
1317    roffset=offset;    offset=roffset;
1318    size_t loop=0;    size_t loop=0;
1319    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1320    size_t outstep=getNoValues();    size_t outstep=getNoValues();
1321    size_t instep=m_left->getNoValues();    size_t instep=m_left->getNoValues();
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
1322    switch (m_op)    switch (m_op)
1323    {    {
1324      case TRACE:      case TRACE:
1325      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1326      {      {
1327  size_t zz=sampleNo*getNumDPPSample()+loop;              DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "point=" <<  zz<< endl;)  
 LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)  
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
1328          subroffset+=instep;          subroffset+=instep;
1329          offset+=outstep;          offset+=outstep;
1330      }      }
# Line 1174  LAZYDEBUG(cerr << "Result of trace=" << Line 1332  LAZYDEBUG(cerr << "Result of trace=" <<
1332      case TRANS:      case TRANS:
1333      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1334      {      {
1335              DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);              DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1336          subroffset+=instep;          subroffset+=instep;
1337          offset+=outstep;          offset+=outstep;
1338      }      }
# Line 1182  LAZYDEBUG(cerr << "Result of trace=" << Line 1340  LAZYDEBUG(cerr << "Result of trace=" <<
1340      default:      default:
1341      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1342    }    }
1343    return &v;    return &m_samples;
1344  }  }
1345    
1346    
1347  /*  const DataTypes::ValueType*
1348    \brief Compute the value of the expression (unary operation with int params) for the given sample.  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1349  {  {
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
1350    if (m_readytype!='E')    if (m_readytype!='E')
1351    {    {
1352      throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1353      }
1354      if (m_op==IDENTITY)
1355      {
1356        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1357    }    }
     // since we can't write the result over the input, we need a result offset further along  
1358    size_t subroffset;    size_t subroffset;
1359    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    size_t offset;
1360  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1361  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)    roffset=m_samplesize*tid;
1362    roffset=offset;    offset=roffset;
1363    size_t loop=0;    size_t loop=0;
1364    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1365    size_t outstep=getNoValues();    size_t outstep=getNoValues();
1366    size_t instep=m_left->getNoValues();    size_t instep=m_left->getNoValues();
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
1367    switch (m_op)    switch (m_op)
1368    {    {
1369      case SWAP:      case SWAP:
1370      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1371      {      {
1372              DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);              DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1373          subroffset+=instep;          subroffset+=instep;
1374          offset+=outstep;          offset+=outstep;
1375      }      }
1376      break;      break;
1377      default:      default:
1378      throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1379    }    }
1380    return &v;    return &m_samples;
1381  }  }
1382    
1383    const DataTypes::ValueType*
1384    DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1385    {
1386      if (m_readytype!='E')
1387      {
1388        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1389      }
1390      if (m_op!=CONDEVAL)
1391      {
1392        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1393      }
1394      size_t subroffset;
1395    
1396      const ValueType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1397      const ValueType* srcres=0;
1398      if ((*maskres)[subroffset]>0)
1399      {
1400        srcres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1401      }
1402      else
1403      {
1404        srcres=m_right->resolveNodeSample(tid, sampleNo, subroffset);
1405      }
1406    
1407  #define PROC_OP(TYPE,X)                               \    // Now we need to copy the result
1408      for (int j=0;j<onumsteps;++j)\  
1409      {\    roffset=m_samplesize*tid;
1410        for (int i=0;i<numsteps;++i,resultp+=resultStep) \    for (int i=0;i<m_samplesize;++i)
1411        { \    {
1412  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\      m_samples[roffset+i]=(*srcres)[subroffset+i];  
1413  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\    }
1414           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  
1415  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \    return &m_samples;
1416           lroffset+=leftstep; \  }
          rroffset+=rightstep; \  
       }\  
       lroffset+=oleftstep;\  
       rroffset+=orightstep;\  
     }  
1417    
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1418  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1419  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1420  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 1274  LAZYDEBUG(cout << " result=      " << re Line 1424  LAZYDEBUG(cout << " result=      " << re
1424  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1425  // For example, 2+Vector.  // For example, 2+Vector.
1426  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1427  DataTypes::ValueType*  const DataTypes::ValueType*
1428  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset) const
1429  {  {
1430  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1431    
# Line 1387  LAZYDEBUG(cout << "Resolve binary: " << Line 1537  LAZYDEBUG(cout << "Resolve binary: " <<
1537    
1538    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1539      // Get the values of sub-expressions      // Get the values of sub-expressions
1540    const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1541      // calcBufss for why we can't put offset as the 2nd param above    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
   const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
1542  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1543  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1544  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
# Line 1399  LAZYDEBUG(cout << "onumsteps=" << onumst Line 1547  LAZYDEBUG(cout << "onumsteps=" << onumst
1547  LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1548  LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1549    
1550    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1551    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1552    
1553    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1554      roffset=m_samplesize*tid;
1555      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we received
1556    switch(m_op)    switch(m_op)
1557    {    {
1558      case ADD:      case ADD:
# Line 1421  LAZYDEBUG(cout << "" << LS << RS << LN < Line 1573  LAZYDEBUG(cout << "" << LS << RS << LN <
1573      default:      default:
1574      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1575    }    }
1576    roffset=offset;  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1577    return &v;    return &m_samples;
1578  }  }
1579    
1580    
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1581  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1582  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1583  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1584  DataTypes::ValueType*  const DataTypes::ValueType*
1585  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset) const
1586  {  {
1587  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1588    
1589    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1590      // first work out which of the children are expanded      // first work out which of the children are expanded
1591    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1592    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1593    int steps=getNumDPPSample();    int steps=getNumDPPSample();
 /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
1594    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1595    int rightStep=(rightExp?m_right->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1596    
1597    int resultStep=getNoValues();    int resultStep=getNoValues();
1598      // Get the values of sub-expressions (leave a gap of one sample for the result).    roffset=m_samplesize*tid;
1599    int gap=offset+m_samplesize;      size_t offset=roffset;
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
   
   const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);  
   gap+=m_left->getMaxSampleSize();  
   
1600    
1601  LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)    const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
1602    
1603      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
   const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);  
1604    
1605  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1606  cout << getNoValues() << endl;)  cout << getNoValues() << endl;)
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
1607    
1608  for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
1609  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1610  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1611  LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
# Line 1499  LAZYDEBUG(cout << "m_samplesize=" << m_s Line 1614  LAZYDEBUG(cout << "m_samplesize=" << m_s
1614  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1615  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1616    
1617    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we received
1618    switch(m_op)    switch(m_op)
1619    {    {
1620      case PROD:      case PROD:
1621      for (int i=0;i<steps;++i,resultp+=resultStep)      for (int i=0;i<steps;++i,resultp+=resultStep)
1622      {      {
   
 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)  
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
1623            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1624            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1625    
# Line 1518  LAZYDEBUG(cout << DataTypes::pointToStri Line 1628  LAZYDEBUG(cout << DataTypes::pointToStri
1628    
1629            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1630    
 LAZYDEBUG(cout << "Results--\n";  
 {  
   DataVector dv(getNoValues());  
 for (int z=0;z<getNoValues();++z)  
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
1631        lroffset+=leftStep;        lroffset+=leftStep;
1632        rroffset+=rightStep;        rroffset+=rightStep;
1633      }      }
# Line 1539  cout << "\nWritten to: " << resultp << " Line 1636  cout << "\nWritten to: " << resultp << "
1636      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1637    }    }
1638    roffset=offset;    roffset=offset;
1639    return &v;    return &m_samples;
1640  }  }
1641    
1642    
   
 /*  
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
1643  const DataTypes::ValueType*  const DataTypes::ValueType*
1644  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset) const
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
   
 }  
   
 const DataTypes::ValueType*  
 DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  
1645  {  {
1646  #ifdef _OPENMP  #ifdef _OPENMP
1647      int tid=omp_get_thread_num();      int tid=omp_get_thread_num();
1648  #else  #else
1649      int tid=0;      int tid=0;
1650  #endif  #endif
1651      return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);  
1652    #ifdef LAZY_STACK_PROF
1653        stackstart[tid]=&tid;
1654        stackend[tid]=&tid;
1655        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1656        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1657        #pragma omp critical
1658        if (d>maxstackuse)
1659        {
1660    cout << "Max resolve Stack use " << d << endl;
1661            maxstackuse=d;
1662        }
1663        return r;
1664    #else
1665        return resolveNodeSample(tid, sampleNo, roffset);
1666    #endif
1667  }  }
1668    
1669    
1670  // This needs to do the work of the idenity constructor  // This needs to do the work of the identity constructor
1671  void  void
1672  DataLazy::resolveToIdentity()  DataLazy::resolveToIdentity()
1673  {  {
1674     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1675      return;      return;
1676     DataReady_ptr p=resolve();     DataReady_ptr p=resolveNodeWorker();
1677     makeIdentity(p);     makeIdentity(p);
1678  }  }
1679    
# Line 1635  void DataLazy::makeIdentity(const DataRe Line 1689  void DataLazy::makeIdentity(const DataRe
1689     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1690     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1691     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1692     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1693     m_left.reset();     m_left.reset();
1694     m_right.reset();     m_right.reset();
1695  }  }
1696    
1697  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
1698  DataReady_ptr  DataReady_ptr
1699  DataLazy::resolve()  DataLazy::resolve()
1700  {  {
1701        resolveToIdentity();
1702        return m_id;
1703    }
1704    
1705    
1706    /* This is really a static method but I think that caused problems in windows */
1707    void
1708    DataLazy::resolveGroupWorker(std::vector<DataLazy*>& dats)
1709    {
1710      if (dats.empty())
1711      {
1712        return;
1713      }
1714      vector<DataLazy*> work;
1715      FunctionSpace fs=dats[0]->getFunctionSpace();
1716      bool match=true;
1717      for (int i=dats.size()-1;i>=0;--i)
1718      {
1719        if (dats[i]->m_readytype!='E')
1720        {
1721            dats[i]->collapse();
1722        }
1723        if (dats[i]->m_op!=IDENTITY)
1724        {
1725            work.push_back(dats[i]);
1726            if (fs!=dats[i]->getFunctionSpace())
1727            {
1728                match=false;
1729            }
1730        }
1731      }
1732      if (work.empty())
1733      {
1734        return;     // no work to do
1735      }
1736      if (match)    // all functionspaces match.  Yes I realise this is overly strict
1737      {     // it is possible that dats[0] is one of the objects which we discarded and
1738            // all the other functionspaces match.
1739        vector<DataExpanded*> dep;
1740        vector<ValueType*> vecs;
1741        for (int i=0;i<work.size();++i)
1742        {
1743            dep.push_back(new DataExpanded(fs,work[i]->getShape(), ValueType(work[i]->getNoValues())));
1744            vecs.push_back(&(dep[i]->getVectorRW()));
1745        }
1746        int totalsamples=work[0]->getNumSamples();
1747        const ValueType* res=0; // Storage for answer
1748        int sample;
1749        #pragma omp parallel private(sample, res)
1750        {
1751            size_t roffset=0;
1752            #pragma omp for schedule(static)
1753            for (sample=0;sample<totalsamples;++sample)
1754            {
1755            roffset=0;
1756            int j;
1757            for (j=work.size()-1;j>=0;--j)
1758            {
1759    #ifdef _OPENMP
1760                    res=work[j]->resolveNodeSample(omp_get_thread_num(),sample,roffset);
1761    #else
1762                    res=work[j]->resolveNodeSample(0,sample,roffset);
1763    #endif
1764                    DataVector::size_type outoffset=dep[j]->getPointOffset(sample,0);
1765                    memcpy(&((*vecs[j])[outoffset]),&((*res)[roffset]),work[j]->m_samplesize*sizeof(DataVector::ElementType));
1766            }
1767            }
1768        }
1769        // Now we need to load the new results as identity ops into the lazy nodes
1770        for (int i=work.size()-1;i>=0;--i)
1771        {
1772            work[i]->makeIdentity(boost::dynamic_pointer_cast<DataReady>(dep[i]->getPtr()));
1773        }
1774      }
1775      else  // functionspaces do not match
1776      {
1777        for (int i=0;i<work.size();++i)
1778        {
1779            work[i]->resolveToIdentity();
1780        }
1781      }
1782    }
1783    
1784    
1785  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  
1786  LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  // This version of resolve uses storage in each node to hold results
1787    DataReady_ptr
1788    DataLazy::resolveNodeWorker()
1789    {
1790    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1791    {    {
1792      collapse();      collapse();
# Line 1659  LAZYDEBUG(cout << "Buffers=" << m_buffsR Line 1796  LAZYDEBUG(cout << "Buffers=" << m_buffsR
1796      return m_id;      return m_id;
1797    }    }
1798      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
1799    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1800    ValueType& resvec=result->getVectorRW();    ValueType& resvec=result->getVectorRW();
1801    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1802    
1803    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1804    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1805    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
   size_t resoffset=0;       // where in the vector to find the answer  
1806  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1807    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1808    {    {
1809        if (sample==0)  {ENABLEDEBUG}      size_t roffset=0;
1810    #ifdef LAZY_STACK_PROF
1811  //       if (sample==5800/getNumDPPSample())  {ENABLEDEBUG}      stackstart[omp_get_thread_num()]=&roffset;
1812  LAZYDEBUG(cout << "################################# " << sample << endl;)      stackend[omp_get_thread_num()]=&roffset;
1813    #endif
1814        #pragma omp for schedule(static)
1815        for (sample=0;sample<totalsamples;++sample)
1816        {
1817            roffset=0;
1818  #ifdef _OPENMP  #ifdef _OPENMP
1819      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1820  #else  #else
1821      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1822  #endif  #endif
1823  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1824  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1825      outoffset=result->getPointOffset(sample,0);              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1826  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1827      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      }
1828      {    }
1829  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  #ifdef LAZY_STACK_PROF
1830      resvec[outoffset]=(*res)[resoffset];    for (int i=0;i<getNumberOfThreads();++i)
1831      }    {
1832  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)      size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1833  LAZYDEBUG(cerr << "*********************************" << endl;)  //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1834      DISABLEDEBUG      if (r>maxstackuse)
1835        {
1836            maxstackuse=r;
1837        }
1838    }    }
1839      cout << "Max resolve Stack use=" << maxstackuse << endl;
1840    #endif
1841    return resptr;    return resptr;
1842  }  }
1843    
# Line 1708  std::string Line 1845  std::string
1845  DataLazy::toString() const  DataLazy::toString() const
1846  {  {
1847    ostringstream oss;    ostringstream oss;
1848    oss << "Lazy Data:";    oss << "Lazy Data: [depth=" << m_height<< "] ";
1849    intoString(oss);    switch (escriptParams.getLAZY_STR_FMT())
1850      {
1851      case 1:   // tree format
1852        oss << endl;
1853        intoTreeString(oss,"");
1854        break;
1855      case 2:   // just the depth
1856        break;
1857      default:
1858        intoString(oss);
1859        break;
1860      }
1861    return oss.str();    return oss.str();
1862  }  }
1863    
# Line 1750  DataLazy::intoString(ostringstream& oss) Line 1898  DataLazy::intoString(ostringstream& oss)
1898    case G_UNARY_P:    case G_UNARY_P:
1899    case G_NP1OUT:    case G_NP1OUT:
1900    case G_NP1OUT_P:    case G_NP1OUT_P:
1901      case G_REDUCTION:
1902      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1903      m_left->intoString(oss);      m_left->intoString(oss);
1904      oss << ')';      oss << ')';
# Line 1767  DataLazy::intoString(ostringstream& oss) Line 1916  DataLazy::intoString(ostringstream& oss)
1916      oss << ", " << m_axis_offset << ", " << m_transpose;      oss << ", " << m_axis_offset << ", " << m_transpose;
1917      oss << ')';      oss << ')';
1918      break;      break;
1919      case G_CONDEVAL:
1920        oss << opToString(m_op)<< '(' ;
1921        m_mask->intoString(oss);
1922        oss << " ? ";
1923        m_left->intoString(oss);
1924        oss << " : ";
1925        m_right->intoString(oss);
1926        oss << ')';
1927        break;
1928    default:    default:
1929      oss << "UNKNOWN";      oss << "UNKNOWN";
1930    }    }
1931  }  }
1932    
1933    
1934    void
1935    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1936    {
1937      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1938      switch (getOpgroup(m_op))
1939      {
1940      case G_IDENTITY:
1941        if (m_id->isExpanded())
1942        {
1943           oss << "E";
1944        }
1945        else if (m_id->isTagged())
1946        {
1947          oss << "T";
1948        }
1949        else if (m_id->isConstant())
1950        {
1951          oss << "C";
1952        }
1953        else
1954        {
1955          oss << "?";
1956        }
1957        oss << '@' << m_id.get() << endl;
1958        break;
1959      case G_BINARY:
1960        oss << opToString(m_op) << endl;
1961        indent+='.';
1962        m_left->intoTreeString(oss, indent);
1963        m_right->intoTreeString(oss, indent);
1964        break;
1965      case G_UNARY:
1966      case G_UNARY_P:
1967      case G_NP1OUT:
1968      case G_NP1OUT_P:
1969      case G_REDUCTION:
1970        oss << opToString(m_op) << endl;
1971        indent+='.';
1972        m_left->intoTreeString(oss, indent);
1973        break;
1974      case G_TENSORPROD:
1975        oss << opToString(m_op) << endl;
1976        indent+='.';
1977        m_left->intoTreeString(oss, indent);
1978        m_right->intoTreeString(oss, indent);
1979        break;
1980      case G_NP1OUT_2P:
1981        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1982        indent+='.';
1983        m_left->intoTreeString(oss, indent);
1984        break;
1985      default:
1986        oss << "UNKNOWN";
1987      }
1988    }
1989    
1990    
1991  DataAbstract*  DataAbstract*
1992  DataLazy::deepCopy()  DataLazy::deepCopy()
1993  {  {
1994    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1995    {    {
1996    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1997    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1998      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1999      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2000    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2001    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2002    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2003      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2004      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2005    default:    default:
2006      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2007    }    }
2008  }  }
2009    
2010    
2011    
2012  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2013  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2014  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
# Line 1876  DataLazy::setToZero() Line 2097  DataLazy::setToZero()
2097  //   m_readytype='C';  //   m_readytype='C';
2098  //   m_buffsRequired=1;  //   m_buffsRequired=1;
2099    
2100      (void)privdebug;  // to stop the compiler complaining about unused privdebug
2101    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
   
2102  }  }
2103    
2104  bool  bool

Legend:
Removed from v.2496  
changed lines
  Added in v.5464

  ViewVC Help
Powered by ViewVC 1.1.26