/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2496 by jfenwick, Fri Jun 26 06:09:47 2009 UTC revision 2777 by jfenwick, Thu Nov 26 01:06:00 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 42  bool privdebug=false; Line 42  bool privdebug=false;
42  #define DISABLEDEBUG privdebug=false;  #define DISABLEDEBUG privdebug=false;
43  }  }
44    
45  // #define SIZELIMIT  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
 // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}  
 //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}  
46    
47  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49    
50  /*  /*
51  How does DataLazy work?  How does DataLazy work?
52  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 110  namespace escript Line 109  namespace escript
109  namespace  namespace
110  {  {
111    
112    
113    // enabling this will print out when ever the maximum stacksize used by resolve increases
114    // it assumes _OPENMP is also in use
115    //#define LAZY_STACK_PROF
116    
117    
118    
119    #ifndef _OPENMP
120      #ifdef LAZY_STACK_PROF
121      #undef LAZY_STACK_PROF
122      #endif
123    #endif
124    
125    
126    #ifdef LAZY_STACK_PROF
127    std::vector<void*> stackstart(getNumberOfThreads());
128    std::vector<void*> stackend(getNumberOfThreads());
129    size_t maxstackuse=0;
130    #endif
131    
132  enum ES_opgroup  enum ES_opgroup
133  {  {
134     G_UNKNOWN,     G_UNKNOWN,
# Line 120  enum ES_opgroup Line 139  enum ES_opgroup
139     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
140     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
141     G_TENSORPROD,    // general tensor product     G_TENSORPROD,    // general tensor product
142     G_NP1OUT_2P      // non-pointwise op with one output requiring two params     G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
143       G_REDUCTION      // non-pointwise unary op with a scalar output
144  };  };
145    
146    
# Line 135  string ES_opstrings[]={"UNKNOWN","IDENTI Line 155  string ES_opstrings[]={"UNKNOWN","IDENTI
155              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
156              "prod",              "prod",
157              "transpose", "trace",              "transpose", "trace",
158              "swapaxes"};              "swapaxes",
159  int ES_opcount=41;              "minval", "maxval"};
160    int ES_opcount=43;
161  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
162              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
163              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 146  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 167  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
167              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
168              G_TENSORPROD,              G_TENSORPROD,
169              G_NP1OUT_P, G_NP1OUT_P,              G_NP1OUT_P, G_NP1OUT_P,
170              G_NP1OUT_2P};              G_NP1OUT_2P,
171                G_REDUCTION, G_REDUCTION};
172  inline  inline
173  ES_opgroup  ES_opgroup
174  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 191  resultShape(DataAbstract_ptr left, DataA Line 213  resultShape(DataAbstract_ptr left, DataA
213        {        {
214          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
215        }        }
216    
217        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
218        {        {
219          return right->getShape();          return right->getShape();
# Line 395  GTPShape(DataAbstract_ptr left, DataAbst Line 418  GTPShape(DataAbstract_ptr left, DataAbst
418    return shape2;    return shape2;
419  }  }
420    
 // determine the number of samples requires to evaluate an expression combining left and right  
 // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  
 // The same goes for G_TENSORPROD  
 // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefts.  
 // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined  
 // multiple times  
 int  
 calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  
 {  
    switch(getOpgroup(op))  
    {  
    case G_IDENTITY: return 1;  
    case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_UNARY:  
    case G_UNARY_P: return max(left->getBuffsRequired(),1);  
    case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);  
    case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);  
    case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);  
    case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);  
    default:  
     throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");  
    }  
 }  
   
   
421  }   // end anonymous namespace  }   // end anonymous namespace
422    
423    
# Line 435  opToString(ES_optype op) Line 433  opToString(ES_optype op)
433    return ES_opstrings[op];    return ES_opstrings[op];
434  }  }
435    
436    void DataLazy::LazyNodeSetup()
437    {
438    #ifdef _OPENMP
439        int numthreads=omp_get_max_threads();
440        m_samples.resize(numthreads*m_samplesize);
441        m_sampleids=new int[numthreads];
442        for (int i=0;i<numthreads;++i)
443        {
444            m_sampleids[i]=-1;  
445        }
446    #else
447        m_samples.resize(m_samplesize);
448        m_sampleids=new int[1];
449        m_sampleids[0]=-1;
450    #endif  // _OPENMP
451    }
452    
453    
454    // Creates an identity node
455  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
456      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
457        ,m_sampleids(0),
458        m_samples(1)
459  {  {
460     if (p->isLazy())     if (p->isLazy())
461     {     {
# Line 456  LAZYDEBUG(cout << "Wrapping " << dr.get( Line 474  LAZYDEBUG(cout << "Wrapping " << dr.get(
474  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
475  }  }
476    
   
   
   
477  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
478      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
479      m_op(op),      m_op(op),
480      m_axis_offset(0),      m_axis_offset(0),
481      m_transpose(0),      m_transpose(0),
482      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
483  {  {
484     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
485     {     {
486      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
487     }     }
# Line 482  DataLazy::DataLazy(DataAbstract_ptr left Line 497  DataLazy::DataLazy(DataAbstract_ptr left
497     }     }
498     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
499     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
500     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
501     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
502     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
503       LazyNodeSetup();
504     SIZELIMIT     SIZELIMIT
505  }  }
506    
# Line 553  LAZYDEBUG(cout << "Right " << right.get( Line 567  LAZYDEBUG(cout << "Right " << right.get(
567      m_readytype='C';      m_readytype='C';
568     }     }
569     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
570     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
571     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
572       LazyNodeSetup();
573     SIZELIMIT     SIZELIMIT
574  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
575  }  }
# Line 620  DataLazy::DataLazy(DataAbstract_ptr left Line 633  DataLazy::DataLazy(DataAbstract_ptr left
633      m_readytype='C';      m_readytype='C';
634     }     }
635     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(max(m_samplesize,m_right->getMaxSampleSize()),m_left->getMaxSampleSize());    
    m_buffsRequired=calcBuffs(m_left, m_right,m_op);  
636     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
637     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
638       LazyNodeSetup();
639     SIZELIMIT     SIZELIMIT
640  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
641  }  }
# Line 651  DataLazy::DataLazy(DataAbstract_ptr left Line 663  DataLazy::DataLazy(DataAbstract_ptr left
663     }     }
664     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
665     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
666     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
667     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
668     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
669       LazyNodeSetup();
670     SIZELIMIT     SIZELIMIT
671  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
672  }  }
# Line 682  DataLazy::DataLazy(DataAbstract_ptr left Line 693  DataLazy::DataLazy(DataAbstract_ptr left
693     }     }
694     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
695     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
696     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
697     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
698     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
699       LazyNodeSetup();
700     SIZELIMIT     SIZELIMIT
701  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
702  }  }
# Line 714  DataLazy::DataLazy(DataAbstract_ptr left Line 724  DataLazy::DataLazy(DataAbstract_ptr left
724     }     }
725     m_readytype=lleft->m_readytype;     m_readytype=lleft->m_readytype;
726     m_left=lleft;     m_left=lleft;
    m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point  
727     m_samplesize=getNumDPPSample()*getNoValues();     m_samplesize=getNumDPPSample()*getNoValues();
    m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());  
728     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
729     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
730       LazyNodeSetup();
731     SIZELIMIT     SIZELIMIT
732  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
733  }  }
734    
735  DataLazy::~DataLazy()  DataLazy::~DataLazy()
736  {  {
737       delete[] m_sampleids;
738  }  }
739    
740    
 int  
 DataLazy::getBuffsRequired() const  
 {  
     return m_buffsRequired;  
 }  
   
   
 size_t  
 DataLazy::getMaxSampleSize() const  
 {  
     return m_maxsamplesize;  
 }  
   
   
   
 size_t  
 DataLazy::getSampleBufferSize() const  
 {  
     return m_maxsamplesize*(max(1,m_buffsRequired));  
 }  
   
741  /*  /*
742    \brief Evaluates the expression using methods on Data.    \brief Evaluates the expression using methods on Data.
743    This does the work for the collapse method.    This does the work for the collapse method.
# Line 891  DataLazy::collapseToReady() Line 880  DataLazy::collapseToReady()
880      case SWAP:      case SWAP:
881      result=left.swapaxes(m_axis_offset, m_transpose);      result=left.swapaxes(m_axis_offset, m_transpose);
882      break;      break;
883        case MINVAL:
884        result=left.minval();
885        break;
886        case MAXVAL:
887        result=left.minval();
888        break;
889      default:      default:
890      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
891    }    }
# Line 918  DataLazy::collapse() Line 913  DataLazy::collapse()
913    m_op=IDENTITY;    m_op=IDENTITY;
914  }  }
915    
916  /*  
917    \brief Compute the value of the expression (unary operation) for the given sample.  
918    \return Vector which stores the value of the subexpression for the given sample.  
919    \param v A vector to store intermediate results.  
920    \param offset Index in v to begin storing results.  
921    \param sampleNo Sample number to evaluate.  #define PROC_OP(TYPE,X)                               \
922    \param roffset (output parameter) the offset in the return vector where the result begins.      for (int j=0;j<onumsteps;++j)\
923        {\
924    The return value will be an existing vector so do not deallocate it.        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
925    If the result is stored in v it should be stored at the offset given.        { \
926    Everything from offset to the end of v should be considered available for this method to use.  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
927  */  LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
928  DataTypes::ValueType*           tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
929  DataLazy::resolveUnary(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
930             lroffset+=leftstep; \
931             rroffset+=rightstep; \
932          }\
933          lroffset+=oleftstep;\
934          rroffset+=orightstep;\
935        }
936    
937    
938    // The result will be stored in m_samples
939    // The return value is a pointer to the DataVector, offset is the offset within the return value
940    const DataTypes::ValueType*
941    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
942    {
943    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
944        // collapse so we have a 'E' node or an IDENTITY for some other type
945      if (m_readytype!='E' && m_op!=IDENTITY)
946      {
947        collapse();
948      }
949      if (m_op==IDENTITY)  
950      {
951        const ValueType& vec=m_id->getVectorRO();
952        roffset=m_id->getPointOffset(sampleNo, 0);
953    #ifdef LAZY_STACK_PROF
954    int x;
955    if (&x<stackend[omp_get_thread_num()])
956    {
957           stackend[omp_get_thread_num()]=&x;
958    }
959    #endif
960        return &(vec);
961      }
962      if (m_readytype!='E')
963      {
964        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
965      }
966      if (m_sampleids[tid]==sampleNo)
967      {
968        roffset=tid*m_samplesize;
969        return &(m_samples);        // sample is already resolved
970      }
971      m_sampleids[tid]=sampleNo;
972      switch (getOpgroup(m_op))
973      {
974      case G_UNARY:
975      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
976      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
977      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
978      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
979      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
980      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
981      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
982      default:
983        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
984      }
985    }
986    
987    const DataTypes::ValueType*
988    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
989  {  {
990      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
991      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
992      // processing single points.      // processing single points.
993        // we will also know we won't get identity nodes
994    if (m_readytype!='E')    if (m_readytype!='E')
995    {    {
996      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
997    }    }
998    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    if (m_op==IDENTITY)
999    const double* left=&((*vleft)[roffset]);    {
1000    double* result=&(v[offset]);      throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1001    roffset=offset;    }
1002      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1003      const double* left=&((*leftres)[roffset]);
1004      roffset=m_samplesize*tid;
1005      double* result=&(m_samples[roffset]);
1006    switch (m_op)    switch (m_op)
1007    {    {
1008      case SIN:        case SIN:  
# Line 1053  DataLazy::resolveUnary(ValueType& v, siz Line 1112  DataLazy::resolveUnary(ValueType& v, siz
1112      default:      default:
1113      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1114    }    }
1115    return &v;    return &(m_samples);
1116  }  }
1117    
1118    
1119    const DataTypes::ValueType*
1120    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1121    {
1122        // we assume that any collapsing has been done before we get here
1123        // since we only have one argument we don't need to think about only
1124        // processing single points.
1125        // we will also know we won't get identity nodes
1126      if (m_readytype!='E')
1127      {
1128        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1129      }
1130      if (m_op==IDENTITY)
1131      {
1132        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1133      }
1134      size_t loffset=0;
1135      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1136    
1137      roffset=m_samplesize*tid;
1138      unsigned int ndpps=getNumDPPSample();
1139      unsigned int psize=DataTypes::noValues(getShape());
1140      double* result=&(m_samples[roffset]);
1141      switch (m_op)
1142      {
1143        case MINVAL:
1144        {
1145          for (unsigned int z=0;z<ndpps;++z)
1146          {
1147            FMin op;
1148            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1149            loffset+=psize;
1150            result++;
1151          }
1152        }
1153        break;
1154        case MAXVAL:
1155        {
1156          for (unsigned int z=0;z<ndpps;++z)
1157          {
1158          FMax op;
1159          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1160          loffset+=psize;
1161          result++;
1162          }
1163        }
1164        break;
1165        default:
1166        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1167      }
1168      return &(m_samples);
1169    }
1170    
1171    const DataTypes::ValueType*
1172    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
 /*  
   \brief Compute the value of the expression (unary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1173  {  {
1174      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1175      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1176      // processing single points.      // processing single points.
1177    if (m_readytype!='E')    if (m_readytype!='E')
1178    {    {
1179      throw DataException("Programmer error - resolveNP1OUT should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1180    }    }
1181      // since we can't write the result over the input, we need a result offset further along    if (m_op==IDENTITY)
1182    size_t subroffset=roffset+m_samplesize;    {
1183  LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)      throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1184    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    }
1185    roffset=offset;    size_t subroffset;
1186      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1187      roffset=m_samplesize*tid;
1188    size_t loop=0;    size_t loop=0;
1189    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1190    size_t step=getNoValues();    size_t step=getNoValues();
1191      size_t offset=roffset;
1192    switch (m_op)    switch (m_op)
1193    {    {
1194      case SYM:      case SYM:
1195      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1196      {      {
1197          DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1198          subroffset+=step;          subroffset+=step;
1199          offset+=step;          offset+=step;
1200      }      }
# Line 1104  LAZYDEBUG(cerr << "subroffset=" << subro Line 1202  LAZYDEBUG(cerr << "subroffset=" << subro
1202      case NSYM:      case NSYM:
1203      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1204      {      {
1205          DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);          DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1206          subroffset+=step;          subroffset+=step;
1207          offset+=step;          offset+=step;
1208      }      }
# Line 1112  LAZYDEBUG(cerr << "subroffset=" << subro Line 1210  LAZYDEBUG(cerr << "subroffset=" << subro
1210      default:      default:
1211      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1212    }    }
1213    return &v;    return &m_samples;
1214  }  }
1215    
1216  /*  const DataTypes::ValueType*
1217    \brief Compute the value of the expression (unary operation) for the given sample.  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1218  {  {
1219      // we assume that any collapsing has been done before we get here      // we assume that any collapsing has been done before we get here
1220      // since we only have one argument we don't need to think about only      // since we only have one argument we don't need to think about only
1221      // processing single points.      // processing single points.
1222    if (m_readytype!='E')    if (m_readytype!='E')
1223    {    {
1224      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1225      }
1226      if (m_op==IDENTITY)
1227      {
1228        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1229    }    }
     // since we can't write the result over the input, we need a result offset further along  
1230    size_t subroffset;    size_t subroffset;
1231    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    size_t offset;
1232  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1233  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)    roffset=m_samplesize*tid;
1234    roffset=offset;    offset=roffset;
1235    size_t loop=0;    size_t loop=0;
1236    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1237    size_t outstep=getNoValues();    size_t outstep=getNoValues();
1238    size_t instep=m_left->getNoValues();    size_t instep=m_left->getNoValues();
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
1239    switch (m_op)    switch (m_op)
1240    {    {
1241      case TRACE:      case TRACE:
1242      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1243      {      {
1244  size_t zz=sampleNo*getNumDPPSample()+loop;              DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "point=" <<  zz<< endl;)  
 LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)  
 LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)  
 LAZYDEBUG(cerr << subroffset << endl;)  
 LAZYDEBUG(cerr << "output=" << offset << endl;)  
 }  
             DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);  
 if (zz==5800)  
 {  
 LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)  
 }  
1245          subroffset+=instep;          subroffset+=instep;
1246          offset+=outstep;          offset+=outstep;
1247      }      }
# Line 1174  LAZYDEBUG(cerr << "Result of trace=" << Line 1249  LAZYDEBUG(cerr << "Result of trace=" <<
1249      case TRANS:      case TRANS:
1250      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1251      {      {
1252              DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);              DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1253          subroffset+=instep;          subroffset+=instep;
1254          offset+=outstep;          offset+=outstep;
1255      }      }
# Line 1182  LAZYDEBUG(cerr << "Result of trace=" << Line 1257  LAZYDEBUG(cerr << "Result of trace=" <<
1257      default:      default:
1258      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1259    }    }
1260    return &v;    return &m_samples;
1261  }  }
1262    
1263    
1264  /*  const DataTypes::ValueType*
1265    \brief Compute the value of the expression (unary operation with int params) for the given sample.  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
 DataTypes::ValueType*  
 DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const  
1266  {  {
     // we assume that any collapsing has been done before we get here  
     // since we only have one argument we don't need to think about only  
     // processing single points.  
1267    if (m_readytype!='E')    if (m_readytype!='E')
1268    {    {
1269      throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1270      }
1271      if (m_op==IDENTITY)
1272      {
1273        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1274    }    }
     // since we can't write the result over the input, we need a result offset further along  
1275    size_t subroffset;    size_t subroffset;
1276    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    size_t offset;
1277  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)    const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1278  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)    roffset=m_samplesize*tid;
1279    roffset=offset;    offset=roffset;
1280    size_t loop=0;    size_t loop=0;
1281    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1282    size_t outstep=getNoValues();    size_t outstep=getNoValues();
1283    size_t instep=m_left->getNoValues();    size_t instep=m_left->getNoValues();
 LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)  
1284    switch (m_op)    switch (m_op)
1285    {    {
1286      case SWAP:      case SWAP:
1287      for (loop=0;loop<numsteps;++loop)      for (loop=0;loop<numsteps;++loop)
1288      {      {
1289              DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);              DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1290          subroffset+=instep;          subroffset+=instep;
1291          offset+=outstep;          offset+=outstep;
1292      }      }
1293      break;      break;
1294      default:      default:
1295      throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1296    }    }
1297    return &v;    return &m_samples;
1298  }  }
1299    
1300    
1301    
 #define PROC_OP(TYPE,X)                               \  
     for (int j=0;j<onumsteps;++j)\  
     {\  
       for (int i=0;i<numsteps;++i,resultp+=resultStep) \  
       { \  
 LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\  
 LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\  
          tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \  
 LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \  
          lroffset+=leftstep; \  
          rroffset+=rightstep; \  
       }\  
       lroffset+=oleftstep;\  
       rroffset+=orightstep;\  
     }  
   
 /*  
   \brief Compute the value of the expression (binary operation) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1302  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1303  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1304  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 1274  LAZYDEBUG(cout << " result=      " << re Line 1308  LAZYDEBUG(cout << " result=      " << re
1308  // There is an additional complication when scalar operations are considered.  // There is an additional complication when scalar operations are considered.
1309  // For example, 2+Vector.  // For example, 2+Vector.
1310  // In this case each double within the point is treated individually  // In this case each double within the point is treated individually
1311  DataTypes::ValueType*  const DataTypes::ValueType*
1312  DataLazy::resolveBinary(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1313  {  {
1314  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)  LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1315    
# Line 1387  LAZYDEBUG(cout << "Resolve binary: " << Line 1421  LAZYDEBUG(cout << "Resolve binary: " <<
1421    
1422    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1423      // Get the values of sub-expressions      // Get the values of sub-expressions
1424    const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on    const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
1425      // calcBufss for why we can't put offset as the 2nd param above    const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
   const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note  
     // the right child starts further along.  
1426  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1427  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1428  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)  LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
# Line 1399  LAZYDEBUG(cout << "onumsteps=" << onumst Line 1431  LAZYDEBUG(cout << "onumsteps=" << onumst
1431  LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)  LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1432  LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)  LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1433    
1434    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1435    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1436    
1437    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved  
1438      roffset=m_samplesize*tid;
1439      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
1440    switch(m_op)    switch(m_op)
1441    {    {
1442      case ADD:      case ADD:
# Line 1421  LAZYDEBUG(cout << "" << LS << RS << LN < Line 1457  LAZYDEBUG(cout << "" << LS << RS << LN <
1457      default:      default:
1458      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
1459    }    }
1460    roffset=offset;  LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
1461    return &v;    return &m_samples;
1462  }  }
1463    
1464    
   
 /*  
   \brief Compute the value of the expression (tensor product) for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
   If the result is stored in v it should be stored at the offset given.  
   Everything from offset to the end of v should be considered available for this method to use.  
 */  
1465  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1466  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1467  // unlike the other resolve helpers, we must treat these datapoints separately.  // unlike the other resolve helpers, we must treat these datapoints separately.
1468  DataTypes::ValueType*  const DataTypes::ValueType*
1469  DataLazy::resolveTProd(ValueType& v,  size_t offset, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
1470  {  {
1471  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString()  << " to offset " << offset<< endl;)  LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
1472    
1473    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors    size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1474      // first work out which of the children are expanded      // first work out which of the children are expanded
1475    bool leftExp=(m_left->m_readytype=='E');    bool leftExp=(m_left->m_readytype=='E');
1476    bool rightExp=(m_right->m_readytype=='E');    bool rightExp=(m_right->m_readytype=='E');
1477    int steps=getNumDPPSample();    int steps=getNumDPPSample();
 /*  int leftStep=((leftExp && !rightExp)? m_right->getNoValues() : 0);  
   int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);*/  
1478    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method    int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
1479    int rightStep=(rightExp?m_right->getNoValues() : 0);    int rightStep=(rightExp?m_right->getNoValues() : 0);
1480    
1481    int resultStep=getNoValues();    int resultStep=getNoValues();
1482      // Get the values of sub-expressions (leave a gap of one sample for the result).    roffset=m_samplesize*tid;
1483    int gap=offset+m_samplesize;      size_t offset=roffset;
   
 LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  
1484    
1485    const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);    const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
   gap+=m_left->getMaxSampleSize();  
1486    
1487      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
 LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)  
   
   
   const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);  
1488    
1489  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1490  cout << getNoValues() << endl;)  cout << getNoValues() << endl;)
 LAZYDEBUG(cerr << "Result of left=";)  
 LAZYDEBUG(cerr << "[" << lroffset << " .. " << lroffset+m_left->getNoValues() << "]" << endl;  
1491    
1492  for (int i=lroffset, limit=lroffset+(leftExp?m_left->getNoValues()*m_left->getNumDPPSample():m_left->getNoValues());i<limit;++i)  
 {  
 cout << "[" << setw(2) << i-lroffset << "] " << setw(10) << (*left)[i] << " ";  
 if (i%4==0) cout << endl;  
 })  
 LAZYDEBUG(cerr << "\nResult of right=" << endl;)  
 LAZYDEBUG(  
 for (int i=rroffset, limit=rroffset+(rightExp?m_right->getNoValues()*m_right->getNumDPPSample():m_right->getNoValues());i<limit;++i)  
 {  
 cerr << "[" <<  setw(2)<< i-rroffset << "] " << setw(10) << (*right)[i] << " ";  
 if (i%4==0) cout << endl;  
 }  
 cerr << endl;  
 )  
1493  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)  LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
1494  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)  LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1495  LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)  LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
# Line 1499  LAZYDEBUG(cout << "m_samplesize=" << m_s Line 1498  LAZYDEBUG(cout << "m_samplesize=" << m_s
1498  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)  LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1499  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)  LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
1500    
1501    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
1502    switch(m_op)    switch(m_op)
1503    {    {
1504      case PROD:      case PROD:
1505      for (int i=0;i<steps;++i,resultp+=resultStep)      for (int i=0;i<steps;++i,resultp+=resultStep)
1506      {      {
   
 LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)  
 LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)  
 LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)  
   
1507            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1508            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1509    
# Line 1518  LAZYDEBUG(cout << DataTypes::pointToStri Line 1512  LAZYDEBUG(cout << DataTypes::pointToStri
1512    
1513            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1514    
 LAZYDEBUG(cout << "Results--\n";  
 {  
   DataVector dv(getNoValues());  
 for (int z=0;z<getNoValues();++z)  
 {  
   cout << "[" << setw(2) << z<< "] " << setw(10) << resultp[z] << " ";  
   if (z%4==0) cout << endl;  
   dv[z]=resultp[z];  
 }  
 cout << endl << DataTypes::pointToString(dv,getShape(),0,"RESLT");  
 cout << "\nWritten to: " << resultp << " resultStep=" << resultStep << endl;  
 }  
 )  
1515        lroffset+=leftStep;        lroffset+=leftStep;
1516        rroffset+=rightStep;        rroffset+=rightStep;
1517      }      }
# Line 1539  cout << "\nWritten to: " << resultp << " Line 1520  cout << "\nWritten to: " << resultp << "
1520      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
1521    }    }
1522    roffset=offset;    roffset=offset;
1523    return &v;    return &m_samples;
1524  }  }
1525    
1526    
   
 /*  
   \brief Compute the value of the expression for the given sample.  
   \return Vector which stores the value of the subexpression for the given sample.  
   \param v A vector to store intermediate results.  
   \param offset Index in v to begin storing results.  
   \param sampleNo Sample number to evaluate.  
   \param roffset (output parameter) the offset in the return vector where the result begins.  
   
   The return value will be an existing vector so do not deallocate it.  
 */  
 // the vector and the offset are a place where the method could write its data if it wishes  
 // it is not obligated to do so. For example, if it has its own storage already, it can use that.  
 // Hence the return value to indicate where the data is actually stored.  
 // Regardless, the storage should be assumed to be used, even if it isn't.  
   
 // the roffset is the offset within the returned vector where the data begins  
1527  const DataTypes::ValueType*  const DataTypes::ValueType*
1528  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveSample(int sampleNo, size_t& roffset)
 {  
 LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  
     // collapse so we have a 'E' node or an IDENTITY for some other type  
   if (m_readytype!='E' && m_op!=IDENTITY)  
   {  
     collapse();  
   }  
   if (m_op==IDENTITY)    
   {  
     const ValueType& vec=m_id->getVectorRO();  
     if (m_readytype=='C')  
     {  
     roffset=0;  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
     }  
     roffset=m_id->getPointOffset(sampleNo, 0);  
 LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)  
     return &(vec);  
   }  
   if (m_readytype!='E')  
   {  
     throw DataException("Programmer Error - Collapse did not produce an expanded node.");  
   }  
   switch (getOpgroup(m_op))  
   {  
   case G_UNARY:  
   case G_UNARY_P: return resolveUnary(v, offset,sampleNo,roffset);  
   case G_BINARY: return resolveBinary(v, offset,sampleNo,roffset);  
   case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);  
   case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);  
   case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);  
   case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);  
   default:  
     throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");  
   }  
   
 }  
   
 const DataTypes::ValueType*  
 DataLazy::resolveSample(BufferGroup& bg, int sampleNo, size_t& roffset)  
1529  {  {
1530  #ifdef _OPENMP  #ifdef _OPENMP
1531      int tid=omp_get_thread_num();      int tid=omp_get_thread_num();
1532  #else  #else
1533      int tid=0;      int tid=0;
1534  #endif  #endif
1535      return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);  
1536    #ifdef LAZY_STACK_PROF
1537        stackstart[tid]=&tid;
1538        stackend[tid]=&tid;
1539        const DataTypes::ValueType* r=resolveNodeSample(tid, sampleNo, roffset);
1540        size_t d=(size_t)stackstart[tid]-(size_t)stackend[tid];
1541        #pragma omp critical
1542        if (d>maxstackuse)
1543        {
1544    cout << "Max resolve Stack use " << d << endl;
1545            maxstackuse=d;
1546        }
1547        return r;
1548    #else
1549        return resolveNodeSample(tid, sampleNo, roffset);
1550    #endif
1551  }  }
1552    
1553    
1554  // This needs to do the work of the idenity constructor  // This needs to do the work of the identity constructor
1555  void  void
1556  DataLazy::resolveToIdentity()  DataLazy::resolveToIdentity()
1557  {  {
1558     if (m_op==IDENTITY)     if (m_op==IDENTITY)
1559      return;      return;
1560     DataReady_ptr p=resolve();     DataReady_ptr p=resolveNodeWorker();
1561     makeIdentity(p);     makeIdentity(p);
1562  }  }
1563    
# Line 1635  void DataLazy::makeIdentity(const DataRe Line 1573  void DataLazy::makeIdentity(const DataRe
1573     else if(p->isExpanded()) {m_readytype='E';}     else if(p->isExpanded()) {m_readytype='E';}
1574     else if (p->isTagged()) {m_readytype='T';}     else if (p->isTagged()) {m_readytype='T';}
1575     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}     else {throw DataException("Unknown DataReady instance in convertToIdentity constructor.");}
    m_buffsRequired=1;  
1576     m_samplesize=p->getNumDPPSample()*p->getNoValues();     m_samplesize=p->getNumDPPSample()*p->getNoValues();
    m_maxsamplesize=m_samplesize;  
1577     m_left.reset();     m_left.reset();
1578     m_right.reset();     m_right.reset();
1579  }  }
1580    
1581  // To simplify the memory management, all threads operate on one large vector, rather than one each.  
 // Each sample is evaluated independently and copied into the result DataExpanded.  
1582  DataReady_ptr  DataReady_ptr
1583  DataLazy::resolve()  DataLazy::resolve()
1584  {  {
1585        resolveToIdentity();
1586        return m_id;
1587    }
1588    
1589  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  // This version of resolve uses storage in each node to hold results
1590  LAZYDEBUG(cout << "Buffers=" << m_buffsRequired << endl;)  DataReady_ptr
1591    DataLazy::resolveNodeWorker()
1592    {
1593    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally    if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
1594    {    {
1595      collapse();      collapse();
# Line 1659  LAZYDEBUG(cout << "Buffers=" << m_buffsR Line 1599  LAZYDEBUG(cout << "Buffers=" << m_buffsR
1599      return m_id;      return m_id;
1600    }    }
1601      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'      // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
   size_t threadbuffersize=m_maxsamplesize*(max(1,m_buffsRequired)); // Each thread needs to have enough  
     // storage to evaluate its expression  
   int numthreads=1;  
 #ifdef _OPENMP  
   numthreads=omp_get_max_threads();  
 #endif  
   ValueType v(numthreads*threadbuffersize);  
 LAZYDEBUG(cout << "Buffer created with size=" << v.size() << endl;)  
1602    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));    DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
1603    ValueType& resvec=result->getVectorRW();    ValueType& resvec=result->getVectorRW();
1604    DataReady_ptr resptr=DataReady_ptr(result);    DataReady_ptr resptr=DataReady_ptr(result);
1605    
1606    int sample;    int sample;
   size_t outoffset;     // offset in the output data  
1607    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1608    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Storage for answer
   size_t resoffset=0;       // where in the vector to find the answer  
1609  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)  LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1610    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel private(sample,res)
   for (sample=0;sample<totalsamples;++sample)  
1611    {    {
1612        if (sample==0)  {ENABLEDEBUG}      size_t roffset=0;
1613    #ifdef LAZY_STACK_PROF
1614  //       if (sample==5800/getNumDPPSample())  {ENABLEDEBUG}      stackstart[omp_get_thread_num()]=&roffset;
1615  LAZYDEBUG(cout << "################################# " << sample << endl;)      stackend[omp_get_thread_num()]=&roffset;
1616    #endif
1617        #pragma omp for schedule(static)
1618        for (sample=0;sample<totalsamples;++sample)
1619        {
1620            roffset=0;
1621  #ifdef _OPENMP  #ifdef _OPENMP
1622      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);              res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
1623  #else  #else
1624      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.              res=resolveNodeSample(0,sample,roffset);
1625  #endif  #endif
1626  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cout << "Sample #" << sample << endl;)
1627  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
1628      outoffset=result->getPointOffset(sample,0);              DataVector::size_type outoffset=result->getPointOffset(sample,0);
1629  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)              memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
1630      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      }
1631      {    }
1632  LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)  #ifdef LAZY_STACK_PROF
1633      resvec[outoffset]=(*res)[resoffset];    for (int i=0;i<getNumberOfThreads();++i)
1634      }    {
1635  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)      size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
1636  LAZYDEBUG(cerr << "*********************************" << endl;)  //  cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
1637      DISABLEDEBUG      if (r>maxstackuse)
1638        {
1639            maxstackuse=r;
1640        }
1641    }    }
1642      cout << "Max resolve Stack use=" << maxstackuse << endl;
1643    #endif
1644    return resptr;    return resptr;
1645  }  }
1646    
# Line 1709  DataLazy::toString() const Line 1649  DataLazy::toString() const
1649  {  {
1650    ostringstream oss;    ostringstream oss;
1651    oss << "Lazy Data:";    oss << "Lazy Data:";
1652    intoString(oss);    if (escriptParams.getPRINT_LAZY_TREE()==0)
1653      {
1654          intoString(oss);
1655      }
1656      else
1657      {
1658        oss << endl;
1659        intoTreeString(oss,"");
1660      }
1661    return oss.str();    return oss.str();
1662  }  }
1663    
# Line 1750  DataLazy::intoString(ostringstream& oss) Line 1698  DataLazy::intoString(ostringstream& oss)
1698    case G_UNARY_P:    case G_UNARY_P:
1699    case G_NP1OUT:    case G_NP1OUT:
1700    case G_NP1OUT_P:    case G_NP1OUT_P:
1701      case G_REDUCTION:
1702      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
1703      m_left->intoString(oss);      m_left->intoString(oss);
1704      oss << ')';      oss << ')';
# Line 1772  DataLazy::intoString(ostringstream& oss) Line 1721  DataLazy::intoString(ostringstream& oss)
1721    }    }
1722  }  }
1723    
1724    
1725    void
1726    DataLazy::intoTreeString(ostringstream& oss, string indent) const
1727    {
1728      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
1729      switch (getOpgroup(m_op))
1730      {
1731      case G_IDENTITY:
1732        if (m_id->isExpanded())
1733        {
1734           oss << "E";
1735        }
1736        else if (m_id->isTagged())
1737        {
1738          oss << "T";
1739        }
1740        else if (m_id->isConstant())
1741        {
1742          oss << "C";
1743        }
1744        else
1745        {
1746          oss << "?";
1747        }
1748        oss << '@' << m_id.get() << endl;
1749        break;
1750      case G_BINARY:
1751        oss << opToString(m_op) << endl;
1752        indent+='.';
1753        m_left->intoTreeString(oss, indent);
1754        m_right->intoTreeString(oss, indent);
1755        break;
1756      case G_UNARY:
1757      case G_UNARY_P:
1758      case G_NP1OUT:
1759      case G_NP1OUT_P:
1760      case G_REDUCTION:
1761        oss << opToString(m_op) << endl;
1762        indent+='.';
1763        m_left->intoTreeString(oss, indent);
1764        break;
1765      case G_TENSORPROD:
1766        oss << opToString(m_op) << endl;
1767        indent+='.';
1768        m_left->intoTreeString(oss, indent);
1769        m_right->intoTreeString(oss, indent);
1770        break;
1771      case G_NP1OUT_2P:
1772        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
1773        indent+='.';
1774        m_left->intoTreeString(oss, indent);
1775        break;
1776      default:
1777        oss << "UNKNOWN";
1778      }
1779    }
1780    
1781    
1782  DataAbstract*  DataAbstract*
1783  DataLazy::deepCopy()  DataLazy::deepCopy()
1784  {  {
1785    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
1786    {    {
1787    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
1788    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
1789      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
1790      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
1791    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
1792    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
1793    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1794      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
1795      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
1796    default:    default:
1797      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
1798    }    }
1799  }  }
1800    
1801    
1802    
1803  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
1804  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
1805  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
# Line 1876  DataLazy::setToZero() Line 1888  DataLazy::setToZero()
1888  //   m_readytype='C';  //   m_readytype='C';
1889  //   m_buffsRequired=1;  //   m_buffsRequired=1;
1890    
1891      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
1892    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
   
1893  }  }
1894    
1895  bool  bool

Legend:
Removed from v.2496  
changed lines
  Added in v.2777

  ViewVC Help
Powered by ViewVC 1.1.26