/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2472 by jfenwick, Thu Jun 11 23:33:47 2009 UTC revision 2737 by jfenwick, Tue Nov 3 00:44:00 2009 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2009 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 42  bool privdebug=false; Line 42  bool privdebug=false;
42  #define DISABLEDEBUG privdebug=false;  #define DISABLEDEBUG privdebug=false;
43  }  }
44    
45  // #define SIZELIMIT  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
 // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}  
 //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}  
46    
47  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49    
50  /*  /*
51  How does DataLazy work?  How does DataLazy work?
52  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 119  enum ES_opgroup Line 118  enum ES_opgroup
118     G_UNARY_P,       // pointwise operations with one argument, requiring a parameter     G_UNARY_P,       // pointwise operations with one argument, requiring a parameter
119     G_NP1OUT,        // non-pointwise op with one output     G_NP1OUT,        // non-pointwise op with one output
120     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter     G_NP1OUT_P,      // non-pointwise op with one output requiring a parameter
121     G_TENSORPROD     // general tensor product     G_TENSORPROD,    // general tensor product
122       G_NP1OUT_2P,     // non-pointwise op with one output requiring two params
123       G_REDUCTION      // non-pointwise unary op with a scalar output
124  };  };
125    
126    
# Line 133  string ES_opstrings[]={"UNKNOWN","IDENTI Line 134  string ES_opstrings[]={"UNKNOWN","IDENTI
134              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",              "1/","where>0","where<0","where>=0","where<=0", "where<>0","where=0",
135              "symmetric","nonsymmetric",              "symmetric","nonsymmetric",
136              "prod",              "prod",
137              "transpose", "trace"};              "transpose", "trace",
138  int ES_opcount=40;              "swapaxes",
139                "minval", "maxval"};
140    int ES_opcount=43;
141  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENTITY,G_BINARY,G_BINARY,G_BINARY,G_BINARY, G_BINARY,
142              G_UNARY,G_UNARY,G_UNARY, //10              G_UNARY,G_UNARY,G_UNARY, //10
143              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY,    // 17
# Line 143  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT Line 146  ES_opgroup opgroups[]={G_UNKNOWN,G_IDENT
146              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35              G_UNARY,G_UNARY,G_UNARY,G_UNARY,G_UNARY, G_UNARY_P, G_UNARY_P,      // 35
147              G_NP1OUT,G_NP1OUT,              G_NP1OUT,G_NP1OUT,
148              G_TENSORPROD,              G_TENSORPROD,
149              G_NP1OUT_P, G_NP1OUT_P};              G_NP1OUT_P, G_NP1OUT_P,
150                G_NP1OUT_2P,
151                G_REDUCTION, G_REDUCTION};
152  inline  inline
153  ES_opgroup  ES_opgroup
154  getOpgroup(ES_optype op)  getOpgroup(ES_optype op)
# Line 188  resultShape(DataAbstract_ptr left, DataA Line 193  resultShape(DataAbstract_ptr left, DataA
193        {        {
194          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");          throw DataException("Shapes not the name - shapes must match for (point)binary operations.");
195        }        }
196    
197        if (left->getRank()==0)   // we need to allow scalar * anything        if (left->getRank()==0)   // we need to allow scalar * anything
198        {        {
199          return right->getShape();          return right->getShape();
# Line 285  resultShape(DataAbstract_ptr left, ES_op Line 291  resultShape(DataAbstract_ptr left, ES_op
291      }      }
292  }  }
293    
294    DataTypes::ShapeType
295    SwapShape(DataAbstract_ptr left, const int axis0, const int axis1)
296    {
297         // This code taken from the Data.cpp swapaxes() method
298         // Some of the checks are probably redundant here
299         int axis0_tmp,axis1_tmp;
300         const DataTypes::ShapeType& s=left->getShape();
301         DataTypes::ShapeType out_shape;
302         // Here's the equivalent of python s_out=s[axis_offset:]+s[:axis_offset]
303         // which goes thru all shape vector elements starting with axis_offset (at index=rank wrap around to 0)
304         int rank=left->getRank();
305         if (rank<2) {
306            throw DataException("Error - Data::swapaxes argument must have at least rank 2.");
307         }
308         if (axis0<0 || axis0>rank-1) {
309            throw DataException("Error - Data::swapaxes: axis0 must be between 0 and rank-1=" + rank-1);
310         }
311         if (axis1<0 || axis1>rank-1) {
312             throw DataException("Error - Data::swapaxes: axis1 must be between 0 and rank-1=" + rank-1);
313         }
314         if (axis0 == axis1) {
315             throw DataException("Error - Data::swapaxes: axis indices must be different.");
316         }
317         if (axis0 > axis1) {
318             axis0_tmp=axis1;
319             axis1_tmp=axis0;
320         } else {
321             axis0_tmp=axis0;
322             axis1_tmp=axis1;
323         }
324         for (int i=0; i<rank; i++) {
325           if (i == axis0_tmp) {
326              out_shape.push_back(s[axis1_tmp]);
327           } else if (i == axis1_tmp) {
328              out_shape.push_back(s[axis0_tmp]);
329           } else {
330              out_shape.push_back(s[i]);
331           }
332         }
333        return out_shape;
334    }
335    
336    
337  // determine the output shape for the general tensor product operation  // determine the output shape for the general tensor product operation
338  // the additional parameters return information required later for the product  // the additional parameters return information required later for the product
339  // the majority of this code is copy pasted from C_General_Tensor_Product  // the majority of this code is copy pasted from C_General_Tensor_Product
# Line 362  calcBuffs(const DataLazy_ptr& left, cons Line 411  calcBuffs(const DataLazy_ptr& left, cons
411     {     {
412     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
413     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
414       case G_REDUCTION:
415     case G_UNARY:     case G_UNARY:
416     case G_UNARY_P: return max(left->getBuffsRequired(),1);     case G_UNARY_P: return max(left->getBuffsRequired(),1);
417     case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);     case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
418     case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);     case G_NP1OUT_P: return 1+max(left->getBuffsRequired(),1);
419     case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_TENSORPROD: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
420       case G_NP1OUT_2P: return 1+max(left->getBuffsRequired(),1);
421     default:     default:
422      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");      throw DataException("Programmer Error - attempt to calcBuffs() for operator "+opToString(op)+".");
423     }     }
# Line 388  opToString(ES_optype op) Line 439  opToString(ES_optype op)
439    return ES_opstrings[op];    return ES_opstrings[op];
440  }  }
441    
442    #ifdef LAZY_NODE_STORAGE
443    void DataLazy::LazyNodeSetup()
444    {
445    #ifdef _OPENMP
446        int numthreads=omp_get_max_threads();
447        m_samples.resize(numthreads*m_samplesize);
448        m_sampleids=new int[numthreads];
449        for (int i=0;i<numthreads;++i)
450        {
451            m_sampleids[i]=-1;  
452        }
453    #else
454        m_samples.resize(m_samplesize);
455        m_sampleids=new int[1];
456        m_sampleids[0]=-1;
457    #endif  // _OPENMP
458    }
459    #endif   // LAZY_NODE_STORAGE
460    
461    
462    // Creates an identity node
463  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
464      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
465    #ifdef LAZY_NODE_STORAGE
466        ,m_sampleids(0),
467        m_samples(1)
468    #endif
469  {  {
470     if (p->isLazy())     if (p->isLazy())
471     {     {
# Line 409  LAZYDEBUG(cout << "Wrapping " << dr.get( Line 484  LAZYDEBUG(cout << "Wrapping " << dr.get(
484  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
485  }  }
486    
   
   
   
487  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
488      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),(getOpgroup(op)!=G_REDUCTION)?left->getShape():DataTypes::scalarShape),
489      m_op(op),      m_op(op),
490      m_axis_offset(0),      m_axis_offset(0),
491      m_transpose(0),      m_transpose(0),
492      m_SL(0), m_SM(0), m_SR(0)      m_SL(0), m_SM(0), m_SR(0)
493  {  {
494     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT))     if ((getOpgroup(op)!=G_UNARY) && (getOpgroup(op)!=G_NP1OUT) && (getOpgroup(op)!=G_REDUCTION))
495     {     {
496      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");      throw DataException("Programmer error - constructor DataLazy(left, op) will only process UNARY operations.");
497     }     }
# Line 440  DataLazy::DataLazy(DataAbstract_ptr left Line 512  DataLazy::DataLazy(DataAbstract_ptr left
512     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
513     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
514     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
515    #ifdef LAZY_NODE_STORAGE
516       LazyNodeSetup();
517    #endif
518     SIZELIMIT     SIZELIMIT
519  }  }
520    
# Line 510  LAZYDEBUG(cout << "Right " << right.get( Line 585  LAZYDEBUG(cout << "Right " << right.get(
585     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
586     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
587     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
588    #ifdef LAZY_NODE_STORAGE
589       LazyNodeSetup();
590    #endif
591     SIZELIMIT     SIZELIMIT
592  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
593  }  }
# Line 577  DataLazy::DataLazy(DataAbstract_ptr left Line 655  DataLazy::DataLazy(DataAbstract_ptr left
655     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
656     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
657     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
658    #ifdef LAZY_NODE_STORAGE
659       LazyNodeSetup();
660    #endif
661     SIZELIMIT     SIZELIMIT
662  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
663  }  }
# Line 609  DataLazy::DataLazy(DataAbstract_ptr left Line 690  DataLazy::DataLazy(DataAbstract_ptr left
690     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
691     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
692     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
693    #ifdef LAZY_NODE_STORAGE
694       LazyNodeSetup();
695    #endif
696     SIZELIMIT     SIZELIMIT
697  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
698  }  }
# Line 640  DataLazy::DataLazy(DataAbstract_ptr left Line 724  DataLazy::DataLazy(DataAbstract_ptr left
724     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
725     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
726     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
727    #ifdef LAZY_NODE_STORAGE
728       LazyNodeSetup();
729    #endif
730     SIZELIMIT     SIZELIMIT
731  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
732  }  }
733    
734    
735    DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op, const int axis0, const int axis1)
736        : parent(left->getFunctionSpace(), SwapShape(left,axis0,axis1)),
737        m_op(op),
738        m_axis_offset(axis0),
739        m_transpose(axis1),
740        m_tol(0)
741    {
742       if ((getOpgroup(op)!=G_NP1OUT_2P))
743       {
744        throw DataException("Programmer error - constructor DataLazy(left, op, tol) will only process UNARY operations which require two integer parameters.");
745       }
746       DataLazy_ptr lleft;
747       if (!left->isLazy())
748       {
749        lleft=DataLazy_ptr(new DataLazy(left));
750       }
751       else
752       {
753        lleft=dynamic_pointer_cast<DataLazy>(left);
754       }
755       m_readytype=lleft->m_readytype;
756       m_left=lleft;
757       m_buffsRequired=calcBuffs(m_left, m_right,m_op); // yeah m_right will be null at this point
758       m_samplesize=getNumDPPSample()*getNoValues();
759       m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
760       m_children=m_left->m_children+1;
761       m_height=m_left->m_height+1;
762    #ifdef LAZY_NODE_STORAGE
763       LazyNodeSetup();
764    #endif
765       SIZELIMIT
766    LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
767    }
768    
769  DataLazy::~DataLazy()  DataLazy::~DataLazy()
770  {  {
771    #ifdef LAZY_NODE_SETUP
772       delete[] m_sampleids;
773       delete[] m_samples;
774    #endif
775  }  }
776    
777    
# Line 809  DataLazy::collapseToReady() Line 935  DataLazy::collapseToReady()
935      case TRACE:      case TRACE:
936      result=left.trace(m_axis_offset);      result=left.trace(m_axis_offset);
937      break;      break;
938        case SWAP:
939        result=left.swapaxes(m_axis_offset, m_transpose);
940        break;
941        case MINVAL:
942        result=left.minval();
943        break;
944        case MAXVAL:
945        result=left.minval();
946        break;
947      default:      default:
948      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - collapseToReady does not know how to resolve operator "+opToString(m_op)+".");
949    }    }
# Line 858  DataLazy::resolveUnary(ValueType& v, siz Line 993  DataLazy::resolveUnary(ValueType& v, siz
993    {    {
994      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");      throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
995    }    }
996    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,roffset);    const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
997    const double* left=&((*vleft)[roffset]);    const double* left=&((*vleft)[roffset]);
998    double* result=&(v[offset]);    double* result=&(v[offset]);
999    roffset=offset;    roffset=offset;
# Line 975  DataLazy::resolveUnary(ValueType& v, siz Line 1110  DataLazy::resolveUnary(ValueType& v, siz
1110  }  }
1111    
1112    
1113    /*
1114      \brief Compute the value of the expression (reduction operation) for the given sample.
1115      \return Vector which stores the value of the subexpression for the given sample.
1116      \param v A vector to store intermediate results.
1117      \param offset Index in v to begin storing results.
1118      \param sampleNo Sample number to evaluate.
1119      \param roffset (output parameter) the offset in the return vector where the result begins.
1120    
1121      The return value will be an existing vector so do not deallocate it.
1122      If the result is stored in v it should be stored at the offset given.
1123      Everything from offset to the end of v should be considered available for this method to use.
1124    */
1125    DataTypes::ValueType*
1126    DataLazy::resolveReduction(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1127    {
1128        // we assume that any collapsing has been done before we get here
1129        // since we only have one argument we don't need to think about only
1130        // processing single points.
1131      if (m_readytype!='E')
1132      {
1133        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1134      }
1135      const ValueType* vleft=m_left->resolveVectorSample(v,offset,sampleNo,roffset);
1136      double* result=&(v[offset]);
1137      roffset=offset;
1138      unsigned int ndpps=getNumDPPSample();
1139      unsigned int psize=DataTypes::noValues(getShape());
1140      switch (m_op)
1141      {
1142        case MINVAL:
1143        {
1144          for (unsigned int z=0;z<ndpps;++z)
1145          {
1146             FMin op;
1147             *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max());
1148             roffset+=psize;
1149             result++;
1150          }
1151        }
1152        break;
1153        case MAXVAL:
1154        {
1155          for (unsigned int z=0;z<ndpps;++z)
1156          {
1157             FMax op;
1158             *result=DataMaths::reductionOp(*vleft, m_left->getShape(), roffset, op, numeric_limits<double>::max()*-1);
1159             roffset+=psize;
1160             result++;
1161          }
1162        }
1163        break;
1164        default:
1165        throw DataException("Programmer error - resolveReduction can not resolve operator "+opToString(m_op)+".");
1166      }
1167      return &v;
1168    }
1169    
1170    
1171    
# Line 1004  DataLazy::resolveNP1OUT(ValueType& v, si Line 1194  DataLazy::resolveNP1OUT(ValueType& v, si
1194      // since we can't write the result over the input, we need a result offset further along      // since we can't write the result over the input, we need a result offset further along
1195    size_t subroffset=roffset+m_samplesize;    size_t subroffset=roffset+m_samplesize;
1196  LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)  LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
1197    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1198    roffset=offset;    roffset=offset;
1199    size_t loop=0;    size_t loop=0;
1200    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;    size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
# Line 1057  DataLazy::resolveNP1OUT_P(ValueType& v, Line 1247  DataLazy::resolveNP1OUT_P(ValueType& v,
1247    }    }
1248      // since we can't write the result over the input, we need a result offset further along      // since we can't write the result over the input, we need a result offset further along
1249    size_t subroffset;    size_t subroffset;
1250    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);    const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1251  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)  LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1252  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)  LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1253    roffset=offset;    roffset=offset;
# Line 1104  LAZYDEBUG(cerr << "Result of trace=" << Line 1294  LAZYDEBUG(cerr << "Result of trace=" <<
1294  }  }
1295    
1296    
1297    /*
1298      \brief Compute the value of the expression (unary operation with int params) for the given sample.
1299      \return Vector which stores the value of the subexpression for the given sample.
1300      \param v A vector to store intermediate results.
1301      \param offset Index in v to begin storing results.
1302      \param sampleNo Sample number to evaluate.
1303      \param roffset (output parameter) the offset in the return vector where the result begins.
1304    
1305      The return value will be an existing vector so do not deallocate it.
1306      If the result is stored in v it should be stored at the offset given.
1307      Everything from offset to the end of v should be considered available for this method to use.
1308    */
1309    DataTypes::ValueType*
1310    DataLazy::resolveNP1OUT_2P(ValueType& v, size_t offset, int sampleNo, size_t& roffset) const
1311    {
1312        // we assume that any collapsing has been done before we get here
1313        // since we only have one argument we don't need to think about only
1314        // processing single points.
1315      if (m_readytype!='E')
1316      {
1317        throw DataException("Programmer error - resolveNP1OUT_2P should only be called on expanded Data.");
1318      }
1319        // since we can't write the result over the input, we need a result offset further along
1320      size_t subroffset;
1321      const ValueType* vleft=m_left->resolveVectorSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
1322    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
1323    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
1324      roffset=offset;
1325      size_t loop=0;
1326      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1327      size_t outstep=getNoValues();
1328      size_t instep=m_left->getNoValues();
1329    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
1330      switch (m_op)
1331      {
1332        case SWAP:
1333        for (loop=0;loop<numsteps;++loop)
1334        {
1335                DataMaths::swapaxes(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset, m_transpose);
1336            subroffset+=instep;
1337            offset+=outstep;
1338        }
1339        break;
1340        default:
1341        throw DataException("Programmer error - resolveNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1342      }
1343      return &v;
1344    }
1345    
1346    
1347    
1348  #define PROC_OP(TYPE,X)                               \  #define PROC_OP(TYPE,X)                               \
1349      for (int j=0;j<onumsteps;++j)\      for (int j=0;j<onumsteps;++j)\
1350      {\      {\
# Line 1254  LAZYDEBUG(cout << "Resolve binary: " << Line 1495  LAZYDEBUG(cout << "Resolve binary: " <<
1495    
1496    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0    int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1497      // Get the values of sub-expressions      // Get the values of sub-expressions
1498    const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on    const ValueType* left=m_left->resolveVectorSample(v,offset+getMaxSampleSize(),sampleNo,lroffset); // see note on
1499      // calcBufss for why we can't put offset as the 2nd param above      // calcBufss for why we can't put offset as the 2nd param above
1500    const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveVectorSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1501      // the right child starts further along.      // the right child starts further along.
1502  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)  LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1503  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)  LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
# Line 1330  LAZYDEBUG(cout << "Resolve TensorProduct Line 1571  LAZYDEBUG(cout << "Resolve TensorProduct
1571    
1572  LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)  LAZYDEBUG(cout << "Query left with offset=" << gap << endl;)
1573    
1574    const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);    const ValueType* left=m_left->resolveVectorSample(v,gap,sampleNo,lroffset);
1575    gap+=m_left->getMaxSampleSize();    gap+=m_left->getMaxSampleSize();
1576    
1577    
1578  LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)  LAZYDEBUG(cout << "Query right with offset=" << gap << endl;)
1579    
1580    
1581    const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);    const ValueType* right=m_right->resolveVectorSample(v,gap,sampleNo,rroffset);
1582    
1583  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;  LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
1584  cout << getNoValues() << endl;)  cout << getNoValues() << endl;)
# Line 1410  cout << "\nWritten to: " << resultp << " Line 1651  cout << "\nWritten to: " << resultp << "
1651  }  }
1652    
1653    
1654    #ifdef LAZY_NODE_STORAGE
1655    
1656    // The result will be stored in m_samples
1657    // The return value is a pointer to the DataVector, offset is the offset within the return value
1658    const DataTypes::ValueType*
1659    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1660    {
1661    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1662        // collapse so we have a 'E' node or an IDENTITY for some other type
1663      if (m_readytype!='E' && m_op!=IDENTITY)
1664      {
1665        collapse();
1666      }
1667      if (m_op==IDENTITY)  
1668      {
1669        const ValueType& vec=m_id->getVectorRO();
1670    //     if (m_readytype=='C')
1671    //     {
1672    //  roffset=0;      // all samples read from the same position
1673    //  return &(m_samples);
1674    //     }
1675        roffset=m_id->getPointOffset(sampleNo, 0);
1676        return &(vec);
1677      }
1678      if (m_readytype!='E')
1679      {
1680        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1681      }
1682      if (m_sampleids[tid]==sampleNo)
1683      {
1684        roffset=tid*m_samplesize;
1685        return &(m_samples);        // sample is already resolved
1686      }
1687      m_sampleids[tid]=sampleNo;
1688      switch (getOpgroup(m_op))
1689      {
1690      case G_UNARY:
1691      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1692      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1693      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1694      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1695      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1696      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1697      case G_REDUCTION: return resolveNodeReduction(tid, sampleNo, roffset);
1698      default:
1699        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1700      }
1701    }
1702    
1703    const DataTypes::ValueType*
1704    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1705    {
1706        // we assume that any collapsing has been done before we get here
1707        // since we only have one argument we don't need to think about only
1708        // processing single points.
1709        // we will also know we won't get identity nodes
1710      if (m_readytype!='E')
1711      {
1712        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1713      }
1714      if (m_op==IDENTITY)
1715      {
1716        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1717      }
1718      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1719      const double* left=&((*leftres)[roffset]);
1720      roffset=m_samplesize*tid;
1721      double* result=&(m_samples[roffset]);
1722      switch (m_op)
1723      {
1724        case SIN:  
1725        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1726        break;
1727        case COS:
1728        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1729        break;
1730        case TAN:
1731        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1732        break;
1733        case ASIN:
1734        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1735        break;
1736        case ACOS:
1737        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1738        break;
1739        case ATAN:
1740        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1741        break;
1742        case SINH:
1743        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1744        break;
1745        case COSH:
1746        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1747        break;
1748        case TANH:
1749        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1750        break;
1751        case ERF:
1752    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1753        throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1754    #else
1755        tensor_unary_operation(m_samplesize, left, result, ::erf);
1756        break;
1757    #endif
1758       case ASINH:
1759    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1760        tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1761    #else
1762        tensor_unary_operation(m_samplesize, left, result, ::asinh);
1763    #endif  
1764        break;
1765       case ACOSH:
1766    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1767        tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1768    #else
1769        tensor_unary_operation(m_samplesize, left, result, ::acosh);
1770    #endif  
1771        break;
1772       case ATANH:
1773    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1774        tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1775    #else
1776        tensor_unary_operation(m_samplesize, left, result, ::atanh);
1777    #endif  
1778        break;
1779        case LOG10:
1780        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1781        break;
1782        case LOG:
1783        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1784        break;
1785        case SIGN:
1786        tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1787        break;
1788        case ABS:
1789        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1790        break;
1791        case NEG:
1792        tensor_unary_operation(m_samplesize, left, result, negate<double>());
1793        break;
1794        case POS:
1795        // it doesn't mean anything for delayed.
1796        // it will just trigger a deep copy of the lazy object
1797        throw DataException("Programmer error - POS not supported for lazy data.");
1798        break;
1799        case EXP:
1800        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1801        break;
1802        case SQRT:
1803        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1804        break;
1805        case RECIP:
1806        tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1807        break;
1808        case GZ:
1809        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1810        break;
1811        case LZ:
1812        tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1813        break;
1814        case GEZ:
1815        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1816        break;
1817        case LEZ:
1818        tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1819        break;
1820    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1821        case NEZ:
1822        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1823        break;
1824        case EZ:
1825        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1826        break;
1827    
1828        default:
1829        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1830      }
1831      return &(m_samples);
1832    }
1833    
1834    
1835    const DataTypes::ValueType*
1836    DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset)
1837    {
1838        // we assume that any collapsing has been done before we get here
1839        // since we only have one argument we don't need to think about only
1840        // processing single points.
1841        // we will also know we won't get identity nodes
1842      if (m_readytype!='E')
1843      {
1844        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1845      }
1846      if (m_op==IDENTITY)
1847      {
1848        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1849      }
1850      size_t loffset=0;
1851      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, loffset);
1852    
1853      roffset=m_samplesize*tid;
1854      unsigned int ndpps=getNumDPPSample();
1855      unsigned int psize=DataTypes::noValues(getShape());
1856      double* result=&(m_samples[roffset]);
1857      switch (m_op)
1858      {
1859        case MINVAL:
1860        {
1861          for (unsigned int z=0;z<ndpps;++z)
1862          {
1863            FMin op;
1864            *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max());
1865            loffset+=psize;
1866            result++;
1867          }
1868        }
1869        break;
1870        case MAXVAL:
1871        {
1872          for (unsigned int z=0;z<ndpps;++z)
1873          {
1874          FMax op;
1875          *result=DataMaths::reductionOp(*leftres, m_left->getShape(), loffset, op, numeric_limits<double>::max()*-1);
1876          loffset+=psize;
1877          result++;
1878          }
1879        }
1880        break;
1881        default:
1882        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1883      }
1884      return &(m_samples);
1885    }
1886    
1887    const DataTypes::ValueType*
1888    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1889    {
1890        // we assume that any collapsing has been done before we get here
1891        // since we only have one argument we don't need to think about only
1892        // processing single points.
1893      if (m_readytype!='E')
1894      {
1895        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1896      }
1897      if (m_op==IDENTITY)
1898      {
1899        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1900      }
1901      size_t subroffset;
1902      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1903      roffset=m_samplesize*tid;
1904      size_t loop=0;
1905      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1906      size_t step=getNoValues();
1907      size_t offset=roffset;
1908      switch (m_op)
1909      {
1910        case SYM:
1911        for (loop=0;loop<numsteps;++loop)
1912        {
1913            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1914            subroffset+=step;
1915            offset+=step;
1916        }
1917        break;
1918        case NSYM:
1919        for (loop=0;loop<numsteps;++loop)
1920        {
1921            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1922            subroffset+=step;
1923            offset+=step;
1924        }
1925        break;
1926        default:
1927        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1928      }
1929      return &m_samples;
1930    }
1931    
1932    const DataTypes::ValueType*
1933    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1934    {
1935        // we assume that any collapsing has been done before we get here
1936        // since we only have one argument we don't need to think about only
1937        // processing single points.
1938      if (m_readytype!='E')
1939      {
1940        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1941      }
1942      if (m_op==IDENTITY)
1943      {
1944        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1945      }
1946      size_t subroffset;
1947      size_t offset;
1948      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1949      roffset=m_samplesize*tid;
1950      offset=roffset;
1951      size_t loop=0;
1952      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1953      size_t outstep=getNoValues();
1954      size_t instep=m_left->getNoValues();
1955      switch (m_op)
1956      {
1957        case TRACE:
1958        for (loop=0;loop<numsteps;++loop)
1959        {
1960                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1961            subroffset+=instep;
1962            offset+=outstep;
1963        }
1964        break;
1965        case TRANS:
1966        for (loop=0;loop<numsteps;++loop)
1967        {
1968                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1969            subroffset+=instep;
1970            offset+=outstep;
1971        }
1972        break;
1973        default:
1974        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1975      }
1976      return &m_samples;
1977    }
1978    
1979    
1980    const DataTypes::ValueType*
1981    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1982    {
1983      if (m_readytype!='E')
1984      {
1985        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1986      }
1987      if (m_op==IDENTITY)
1988      {
1989        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1990      }
1991      size_t subroffset;
1992      size_t offset;
1993      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1994      roffset=m_samplesize*tid;
1995      offset=roffset;
1996      size_t loop=0;
1997      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1998      size_t outstep=getNoValues();
1999      size_t instep=m_left->getNoValues();
2000      switch (m_op)
2001      {
2002        case SWAP:
2003        for (loop=0;loop<numsteps;++loop)
2004        {
2005                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
2006            subroffset+=instep;
2007            offset+=outstep;
2008        }
2009        break;
2010        default:
2011        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
2012      }
2013      return &m_samples;
2014    }
2015    
2016    
2017    
2018    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2019    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2020    // If both children are expanded, then we can process them in a single operation (we treat
2021    // the whole sample as one big datapoint.
2022    // If one of the children is not expanded, then we need to treat each point in the sample
2023    // individually.
2024    // There is an additional complication when scalar operations are considered.
2025    // For example, 2+Vector.
2026    // In this case each double within the point is treated individually
2027    const DataTypes::ValueType*
2028    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
2029    {
2030    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
2031    
2032      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2033        // first work out which of the children are expanded
2034      bool leftExp=(m_left->m_readytype=='E');
2035      bool rightExp=(m_right->m_readytype=='E');
2036      if (!leftExp && !rightExp)
2037      {
2038        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
2039      }
2040      bool leftScalar=(m_left->getRank()==0);
2041      bool rightScalar=(m_right->getRank()==0);
2042      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
2043      {
2044        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
2045      }
2046      size_t leftsize=m_left->getNoValues();
2047      size_t rightsize=m_right->getNoValues();
2048      size_t chunksize=1;           // how many doubles will be processed in one go
2049      int leftstep=0;       // how far should the left offset advance after each step
2050      int rightstep=0;
2051      int numsteps=0;       // total number of steps for the inner loop
2052      int oleftstep=0;  // the o variables refer to the outer loop
2053      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
2054      int onumsteps=1;
2055      
2056      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
2057      bool RES=(rightExp && rightScalar);
2058      bool LS=(!leftExp && leftScalar); // left is a single scalar
2059      bool RS=(!rightExp && rightScalar);
2060      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
2061      bool RN=(!rightExp && !rightScalar);
2062      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
2063      bool REN=(rightExp && !rightScalar);
2064    
2065      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
2066      {
2067        chunksize=m_left->getNumDPPSample()*leftsize;
2068        leftstep=0;
2069        rightstep=0;
2070        numsteps=1;
2071      }
2072      else if (LES || RES)
2073      {
2074        chunksize=1;
2075        if (LES)        // left is an expanded scalar
2076        {
2077            if (RS)
2078            {
2079               leftstep=1;
2080               rightstep=0;
2081               numsteps=m_left->getNumDPPSample();
2082            }
2083            else        // RN or REN
2084            {
2085               leftstep=0;
2086               oleftstep=1;
2087               rightstep=1;
2088               orightstep=(RN ? -(int)rightsize : 0);
2089               numsteps=rightsize;
2090               onumsteps=m_left->getNumDPPSample();
2091            }
2092        }
2093        else        // right is an expanded scalar
2094        {
2095            if (LS)
2096            {
2097               rightstep=1;
2098               leftstep=0;
2099               numsteps=m_right->getNumDPPSample();
2100            }
2101            else
2102            {
2103               rightstep=0;
2104               orightstep=1;
2105               leftstep=1;
2106               oleftstep=(LN ? -(int)leftsize : 0);
2107               numsteps=leftsize;
2108               onumsteps=m_right->getNumDPPSample();
2109            }
2110        }
2111      }
2112      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
2113      {
2114        if (LEN)    // and Right will be a single value
2115        {
2116            chunksize=rightsize;
2117            leftstep=rightsize;
2118            rightstep=0;
2119            numsteps=m_left->getNumDPPSample();
2120            if (RS)
2121            {
2122               numsteps*=leftsize;
2123            }
2124        }
2125        else    // REN
2126        {
2127            chunksize=leftsize;
2128            rightstep=leftsize;
2129            leftstep=0;
2130            numsteps=m_right->getNumDPPSample();
2131            if (LS)
2132            {
2133               numsteps*=rightsize;
2134            }
2135        }
2136      }
2137    
2138      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
2139        // Get the values of sub-expressions
2140      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
2141      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2142    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2143    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2144    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2145    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2146    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2147    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2148    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
2149    
2150    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2151    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2152    
2153    
2154      roffset=m_samplesize*tid;
2155      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
2156      switch(m_op)
2157      {
2158        case ADD:
2159            PROC_OP(NO_ARG,plus<double>());
2160        break;
2161        case SUB:
2162        PROC_OP(NO_ARG,minus<double>());
2163        break;
2164        case MUL:
2165        PROC_OP(NO_ARG,multiplies<double>());
2166        break;
2167        case DIV:
2168        PROC_OP(NO_ARG,divides<double>());
2169        break;
2170        case POW:
2171           PROC_OP(double (double,double),::pow);
2172        break;
2173        default:
2174        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2175      }
2176    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2177      return &m_samples;
2178    }
2179    
2180    
2181    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2182    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2183    // unlike the other resolve helpers, we must treat these datapoints separately.
2184    const DataTypes::ValueType*
2185    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2186    {
2187    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2188    
2189      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2190        // first work out which of the children are expanded
2191      bool leftExp=(m_left->m_readytype=='E');
2192      bool rightExp=(m_right->m_readytype=='E');
2193      int steps=getNumDPPSample();
2194      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
2195      int rightStep=(rightExp?m_right->getNoValues() : 0);
2196    
2197      int resultStep=getNoValues();
2198      roffset=m_samplesize*tid;
2199      size_t offset=roffset;
2200    
2201      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2202    
2203      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2204    
2205    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
2206    cout << getNoValues() << endl;)
2207    
2208    
2209    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2210    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2211    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2212    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2213    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2214    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2215    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2216    
2217      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
2218      switch(m_op)
2219      {
2220        case PROD:
2221        for (int i=0;i<steps;++i,resultp+=resultStep)
2222        {
2223              const double *ptr_0 = &((*left)[lroffset]);
2224              const double *ptr_1 = &((*right)[rroffset]);
2225    
2226    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2227    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2228    
2229              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2230    
2231          lroffset+=leftStep;
2232          rroffset+=rightStep;
2233        }
2234        break;
2235        default:
2236        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2237      }
2238      roffset=offset;
2239      return &m_samples;
2240    }
2241    #endif //LAZY_NODE_STORAGE
2242    
2243  /*  /*
2244    \brief Compute the value of the expression for the given sample.    \brief Compute the value of the expression for the given sample.
# Line 1428  cout << "\nWritten to: " << resultp << " Line 2257  cout << "\nWritten to: " << resultp << "
2257    
2258  // the roffset is the offset within the returned vector where the data begins  // the roffset is the offset within the returned vector where the data begins
2259  const DataTypes::ValueType*  const DataTypes::ValueType*
2260  DataLazy::resolveSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)  DataLazy::resolveVectorSample(ValueType& v, size_t offset, int sampleNo, size_t& roffset)
2261  {  {
2262  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)  LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
2263      // collapse so we have a 'E' node or an IDENTITY for some other type      // collapse so we have a 'E' node or an IDENTITY for some other type
# Line 1461  LAZYDEBUG(cout << "Finish  sample " << t Line 2290  LAZYDEBUG(cout << "Finish  sample " << t
2290    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);    case G_NP1OUT: return resolveNP1OUT(v, offset, sampleNo,roffset);
2291    case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);    case G_NP1OUT_P: return resolveNP1OUT_P(v, offset, sampleNo,roffset);
2292    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);    case G_TENSORPROD: return resolveTProd(v,offset, sampleNo,roffset);
2293      case G_NP1OUT_2P: return resolveNP1OUT_2P(v, offset, sampleNo, roffset);
2294      case G_REDUCTION: return resolveReduction(v, offset, sampleNo, roffset);
2295    default:    default:
2296      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
2297    }    }
# Line 1475  DataLazy::resolveSample(BufferGroup& bg, Line 2306  DataLazy::resolveSample(BufferGroup& bg,
2306  #else  #else
2307      int tid=0;      int tid=0;
2308  #endif  #endif
2309      return resolveSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);  #ifdef LAZY_NODE_STORAGE
2310        return resolveNodeSample(tid, sampleNo, roffset);
2311    #else
2312        return resolveVectorSample(bg.getBuffer(tid),bg.getOffset(tid),sampleNo,roffset);
2313    #endif
2314  }  }
2315    
2316    
2317  // This needs to do the work of the idenity constructor  // This needs to do the work of the identity constructor
2318  void  void
2319  DataLazy::resolveToIdentity()  DataLazy::resolveToIdentity()
2320  {  {
2321     if (m_op==IDENTITY)     if (m_op==IDENTITY)
2322      return;      return;
2323     DataReady_ptr p=resolve();  #ifndef LAZY_NODE_STORAGE
2324       DataReady_ptr p=resolveVectorWorker();
2325    #else
2326       DataReady_ptr p=resolveNodeWorker();
2327    #endif
2328     makeIdentity(p);     makeIdentity(p);
2329  }  }
2330    
# Line 1508  void DataLazy::makeIdentity(const DataRe Line 2347  void DataLazy::makeIdentity(const DataRe
2347     m_right.reset();     m_right.reset();
2348  }  }
2349    
2350    
2351    DataReady_ptr
2352    DataLazy::resolve()
2353    {
2354        resolveToIdentity();
2355        return m_id;
2356    }
2357    
2358    #ifdef LAZY_NODE_STORAGE
2359    
2360    // This version of resolve uses storage in each node to hold results
2361    DataReady_ptr
2362    DataLazy::resolveNodeWorker()
2363    {
2364      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2365      {
2366        collapse();
2367      }
2368      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
2369      {
2370        return m_id;
2371      }
2372        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2373      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2374      ValueType& resvec=result->getVectorRW();
2375      DataReady_ptr resptr=DataReady_ptr(result);
2376    
2377      int sample;
2378      int totalsamples=getNumSamples();
2379      const ValueType* res=0;   // Storage for answer
2380    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2381      #pragma omp parallel for private(sample,res) schedule(static)
2382      for (sample=0;sample<totalsamples;++sample)
2383      {
2384        size_t roffset=0;
2385    #ifdef _OPENMP
2386        res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2387    #else
2388        res=resolveNodeSample(0,sample,roffset);
2389    #endif
2390    LAZYDEBUG(cout << "Sample #" << sample << endl;)
2391    LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2392        DataVector::size_type outoffset=result->getPointOffset(sample,0);
2393        memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
2394      }
2395      return resptr;
2396    }
2397    
2398    #endif // LAZY_NODE_STORAGE
2399    
2400  // To simplify the memory management, all threads operate on one large vector, rather than one each.  // To simplify the memory management, all threads operate on one large vector, rather than one each.
2401  // Each sample is evaluated independently and copied into the result DataExpanded.  // Each sample is evaluated independently and copied into the result DataExpanded.
2402  DataReady_ptr  DataReady_ptr
2403  DataLazy::resolve()  DataLazy::resolveVectorWorker()
2404  {  {
2405    
2406  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
# Line 1545  LAZYDEBUG(cout << "Total number of sampl Line 2434  LAZYDEBUG(cout << "Total number of sampl
2434    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
2435    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
2436    {    {
       if (sample==0)  {ENABLEDEBUG}  
   
 //       if (sample==5800/getNumDPPSample())  {ENABLEDEBUG}  
2437  LAZYDEBUG(cout << "################################# " << sample << endl;)  LAZYDEBUG(cout << "################################# " << sample << endl;)
2438  #ifdef _OPENMP  #ifdef _OPENMP
2439      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);      res=resolveVectorSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
2440  #else  #else
2441      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.      res=resolveVectorSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
2442  #endif  #endif
2443  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
2444  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)  LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
# Line 1565  LAZYDEBUG(cerr << "outoffset=" << outoff Line 2451  LAZYDEBUG(cerr << "outoffset=" << outoff
2451      }      }
2452  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
2453  LAZYDEBUG(cerr << "*********************************" << endl;)  LAZYDEBUG(cerr << "*********************************" << endl;)
     DISABLEDEBUG  
2454    }    }
2455    return resptr;    return resptr;
2456  }  }
# Line 1575  DataLazy::toString() const Line 2460  DataLazy::toString() const
2460  {  {
2461    ostringstream oss;    ostringstream oss;
2462    oss << "Lazy Data:";    oss << "Lazy Data:";
2463    intoString(oss);    if (escriptParams.getPRINT_LAZY_TREE()==0)
2464      {
2465          intoString(oss);
2466      }
2467      else
2468      {
2469        oss << endl;
2470        intoTreeString(oss,"");
2471      }
2472    return oss.str();    return oss.str();
2473  }  }
2474    
# Line 1616  DataLazy::intoString(ostringstream& oss) Line 2509  DataLazy::intoString(ostringstream& oss)
2509    case G_UNARY_P:    case G_UNARY_P:
2510    case G_NP1OUT:    case G_NP1OUT:
2511    case G_NP1OUT_P:    case G_NP1OUT_P:
2512      case G_REDUCTION:
2513      oss << opToString(m_op) << '(';      oss << opToString(m_op) << '(';
2514      m_left->intoString(oss);      m_left->intoString(oss);
2515      oss << ')';      oss << ')';
# Line 1627  DataLazy::intoString(ostringstream& oss) Line 2521  DataLazy::intoString(ostringstream& oss)
2521      m_right->intoString(oss);      m_right->intoString(oss);
2522      oss << ')';      oss << ')';
2523      break;      break;
2524      case G_NP1OUT_2P:
2525        oss << opToString(m_op) << '(';
2526        m_left->intoString(oss);
2527        oss << ", " << m_axis_offset << ", " << m_transpose;
2528        oss << ')';
2529        break;
2530      default:
2531        oss << "UNKNOWN";
2532      }
2533    }
2534    
2535    
2536    void
2537    DataLazy::intoTreeString(ostringstream& oss, string indent) const
2538    {
2539      oss << '[' << m_rank << ':' << setw(3) << m_samplesize << "] " << indent;
2540      switch (getOpgroup(m_op))
2541      {
2542      case G_IDENTITY:
2543        if (m_id->isExpanded())
2544        {
2545           oss << "E";
2546        }
2547        else if (m_id->isTagged())
2548        {
2549          oss << "T";
2550        }
2551        else if (m_id->isConstant())
2552        {
2553          oss << "C";
2554        }
2555        else
2556        {
2557          oss << "?";
2558        }
2559        oss << '@' << m_id.get() << endl;
2560        break;
2561      case G_BINARY:
2562        oss << opToString(m_op) << endl;
2563        indent+='.';
2564        m_left->intoTreeString(oss, indent);
2565        m_right->intoTreeString(oss, indent);
2566        break;
2567      case G_UNARY:
2568      case G_UNARY_P:
2569      case G_NP1OUT:
2570      case G_NP1OUT_P:
2571      case G_REDUCTION:
2572        oss << opToString(m_op) << endl;
2573        indent+='.';
2574        m_left->intoTreeString(oss, indent);
2575        break;
2576      case G_TENSORPROD:
2577        oss << opToString(m_op) << endl;
2578        indent+='.';
2579        m_left->intoTreeString(oss, indent);
2580        m_right->intoTreeString(oss, indent);
2581        break;
2582      case G_NP1OUT_2P:
2583        oss << opToString(m_op) << ", " << m_axis_offset << ", " << m_transpose<< endl;
2584        indent+='.';
2585        m_left->intoTreeString(oss, indent);
2586        break;
2587    default:    default:
2588      oss << "UNKNOWN";      oss << "UNKNOWN";
2589    }    }
2590  }  }
2591    
2592    
2593  DataAbstract*  DataAbstract*
2594  DataLazy::deepCopy()  DataLazy::deepCopy()
2595  {  {
2596    switch (getOpgroup(m_op))    switch (getOpgroup(m_op))
2597    {    {
2598    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());    case G_IDENTITY:  return new DataLazy(m_id->deepCopy()->getPtr());
2599    case G_UNARY: return new DataLazy(m_left->deepCopy()->getPtr(),m_op);    case G_UNARY:
2600      case G_REDUCTION:      return new DataLazy(m_left->deepCopy()->getPtr(),m_op);
2601      case G_UNARY_P:   return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_tol);
2602    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);    case G_BINARY:    return new DataLazy(m_left->deepCopy()->getPtr(),m_right->deepCopy()->getPtr(),m_op);
2603    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);    case G_NP1OUT: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(),m_op);
2604    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);    case G_TENSORPROD: return new DataLazy(m_left->deepCopy()->getPtr(), m_right->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2605      case G_NP1OUT_P:   return new DataLazy(m_left->deepCopy()->getPtr(),m_op,  m_axis_offset);
2606      case G_NP1OUT_2P:  return new DataLazy(m_left->deepCopy()->getPtr(), m_op, m_axis_offset, m_transpose);
2607    default:    default:
2608      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");      throw DataException("Programmer error - do not know how to deepcopy operator "+opToString(m_op)+".");
2609    }    }
2610  }  }
2611    
2612    
2613    
2614  // There is no single, natural interpretation of getLength on DataLazy.  // There is no single, natural interpretation of getLength on DataLazy.
2615  // Instances of DataReady can look at the size of their vectors.  // Instances of DataReady can look at the size of their vectors.
2616  // For lazy though, it could be the size the data would be if it were resolved;  // For lazy though, it could be the size the data would be if it were resolved;
# Line 1736  DataLazy::setToZero() Line 2699  DataLazy::setToZero()
2699  //   m_readytype='C';  //   m_readytype='C';
2700  //   m_buffsRequired=1;  //   m_buffsRequired=1;
2701    
2702      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2703    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
   
2704  }  }
2705    
2706  bool  bool

Legend:
Removed from v.2472  
changed lines
  Added in v.2737

  ViewVC Help
Powered by ViewVC 1.1.26