/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2147 by jfenwick, Wed Dec 10 04:41:26 2008 UTC revision 2157 by jfenwick, Mon Dec 15 06:05:58 2008 UTC
# Line 28  Line 28 
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29  #include "Utils.h"  #include "Utils.h"
30    
31  // #define LAZYDEBUG(X) X;  // #define LAZYDEBUG(X) if (privdebug){X;}
32  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
33    namespace
34    {
35    bool privdebug=false;
36    
37    #define ENABLEDEBUG privdebug=true;
38    #define DISABLEDEBUG privdebug=false;
39    }
40    
41  /*  /*
42  How does DataLazy work?  How does DataLazy work?
# Line 285  GTPShape(DataAbstract_ptr left, DataAbst Line 292  GTPShape(DataAbstract_ptr left, DataAbst
292  // determine the number of samples requires to evaluate an expression combining left and right  // determine the number of samples requires to evaluate an expression combining left and right
293  // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
294  // The same goes for G_TENSORPROD  // The same goes for G_TENSORPROD
295    // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefths.
296    // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
297    // multiple times
298  int  int
299  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
300  {  {
301     switch(getOpgroup(op))     switch(getOpgroup(op))
302     {     {
303     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
304     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
305     case G_UNARY:     case G_UNARY:
306     case G_UNARY_P: return max(left->getBuffsRequired(),1);     case G_UNARY_P: return max(left->getBuffsRequired(),1);
307     case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);     case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
# Line 912  DataLazy::resolveNP1OUT(ValueType& v, si Line 922  DataLazy::resolveNP1OUT(ValueType& v, si
922    }    }
923      // since we can't write the result over the input, we need a result offset further along      // since we can't write the result over the input, we need a result offset further along
924    size_t subroffset=roffset+m_samplesize;    size_t subroffset=roffset+m_samplesize;
925    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);  LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
926      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
927    roffset=offset;    roffset=offset;
928      size_t loop=0;
929      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
930      size_t step=getNoValues();
931    switch (m_op)    switch (m_op)
932    {    {
933      case SYM:      case SYM:
934      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
935        {
936            DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
937            subroffset+=step;
938            offset+=step;
939        }
940      break;      break;
941      case NSYM:      case NSYM:
942      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
943        {
944            DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
945            subroffset+=step;
946            offset+=step;
947        }
948      break;      break;
949      default:      default:
950      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
# Line 951  DataLazy::resolveNP1OUT_P(ValueType& v, Line 975  DataLazy::resolveNP1OUT_P(ValueType& v,
975      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
976    }    }
977      // since we can't write the result over the input, we need a result offset further along      // since we can't write the result over the input, we need a result offset further along
978    size_t subroffset=roffset+m_samplesize;    size_t subroffset;
979    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
980    LAZYDEBUG(cerr << "from=" << offset+m_left->m_samplesize << " to=" << subroffset << " ret=" << roffset << endl;)
981    roffset=offset;    roffset=offset;
982      size_t loop=0;
983      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
984      size_t step=getNoValues();
985    switch (m_op)    switch (m_op)
986    {    {
987      case TRACE:      case TRACE:
988           DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);      for (loop=0;loop<numsteps;++loop)
989        {
990                DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
991            subroffset+=step;
992            offset+=step;
993        }
994      break;      break;
995      case TRANS:      case TRANS:
996           DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);      for (loop=0;loop<numsteps;++loop)
997        {
998                DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
999            subroffset+=step;
1000            offset+=step;
1001        }
1002      break;      break;
1003      default:      default:
1004      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
# Line 970  DataLazy::resolveNP1OUT_P(ValueType& v, Line 1008  DataLazy::resolveNP1OUT_P(ValueType& v,
1008    
1009    
1010  #define PROC_OP(TYPE,X)                               \  #define PROC_OP(TYPE,X)                               \
1011      for (int i=0;i<steps;++i,resultp+=resultStep) \      for (int j=0;j<onumsteps;++j)\
1012      { \      {\
1013         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1014         lroffset+=leftStep; \        { \
1015         rroffset+=rightStep; \  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1016    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1017             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1018    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1019             lroffset+=leftstep; \
1020             rroffset+=rightstep; \
1021          }\
1022          lroffset+=oleftstep;\
1023          rroffset+=orightstep;\
1024      }      }
1025    
1026  /*  /*
# Line 1013  LAZYDEBUG(cout << "Resolve binary: " << Line 1059  LAZYDEBUG(cout << "Resolve binary: " <<
1059    }    }
1060    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1061    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1062    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
   int steps=(bigloops?1:getNumDPPSample());  
   size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint  
   if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops  
1063    {    {
1064      if (!leftScalar && !rightScalar)      throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1065      }
1066      size_t leftsize=m_left->getNoValues();
1067      size_t rightsize=m_right->getNoValues();
1068      size_t chunksize=1;           // how many doubles will be processed in one go
1069      int leftstep=0;       // how far should the left offset advance after each step
1070      int rightstep=0;
1071      int numsteps=0;       // total number of steps for the inner loop
1072      int oleftstep=0;  // the o variables refer to the outer loop
1073      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1074      int onumsteps=1;
1075      
1076      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1077      bool RES=(rightExp && rightScalar);
1078      bool LS=(!leftExp && leftScalar); // left is a single scalar
1079      bool RS=(!rightExp && rightScalar);
1080      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1081      bool RN=(!rightExp && !rightScalar);
1082      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1083      bool REN=(rightExp && !rightScalar);
1084    
1085      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1086      {
1087        chunksize=m_left->getNumDPPSample()*leftsize;
1088        leftstep=0;
1089        rightstep=0;
1090        numsteps=1;
1091      }
1092      else if (LES || RES)
1093      {
1094        chunksize=1;
1095        if (LES)        // left is an expanded scalar
1096      {      {
1097         throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");          if (RS)
1098            {
1099               leftstep=1;
1100               rightstep=0;
1101               numsteps=m_left->getNumDPPSample();
1102            }
1103            else        // RN or REN
1104            {
1105               leftstep=0;
1106               oleftstep=1;
1107               rightstep=1;
1108               orightstep=(RN?-rightsize:0);
1109               numsteps=rightsize;
1110               onumsteps=m_left->getNumDPPSample();
1111            }
1112      }      }
1113      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());      else        // right is an expanded scalar
1114      chunksize=1;    // for scalar      {
1115    }              if (LS)
1116    int leftStep=((leftExp && (!rightExp || rightScalar))? m_right->getNoValues() : 0);          {
1117    int rightStep=((rightExp && (!leftExp || leftScalar))? m_left->getNoValues() : 0);             rightstep=1;
1118    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0             leftstep=0;
1119               numsteps=m_right->getNumDPPSample();
1120            }
1121            else
1122            {
1123               rightstep=0;
1124               orightstep=1;
1125               leftstep=1;
1126               oleftstep=(LN?-leftsize:0);
1127               numsteps=leftsize;
1128               onumsteps=m_right->getNumDPPSample();
1129            }
1130        }
1131      }
1132      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1133      {
1134        if (LEN)    // and Right will be a single value
1135        {
1136            chunksize=rightsize;
1137            leftstep=rightsize;
1138            rightstep=0;
1139            numsteps=m_left->getNumDPPSample();
1140            if (RS)
1141            {
1142               numsteps*=leftsize;
1143            }
1144        }
1145        else    // REN
1146        {
1147            chunksize=leftsize;
1148            rightstep=leftsize;
1149            leftstep=0;
1150            numsteps=m_right->getNumDPPSample();
1151            if (LS)
1152            {
1153               numsteps*=rightsize;
1154            }
1155        }
1156      }
1157    
1158      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1159      // Get the values of sub-expressions      // Get the values of sub-expressions
1160    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on
1161    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note      // calcBufss for why we can't put offset as the 2nd param above
1162      const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1163      // the right child starts further along.      // the right child starts further along.
1164    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1165    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1166    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1167    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1168    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1169    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1170    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1171    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1172    switch(m_op)    switch(m_op)
1173    {    {
# Line 1058  LAZYDEBUG(cout << "Resolve binary: " << Line 1194  LAZYDEBUG(cout << "Resolve binary: " <<
1194  }  }
1195    
1196    
1197    
1198  /*  /*
1199    \brief Compute the value of the expression (tensor product) for the given sample.    \brief Compute the value of the expression (tensor product) for the given sample.
1200    \return Vector which stores the value of the subexpression for the given sample.    \return Vector which stores the value of the subexpression for the given sample.
# Line 1087  LAZYDEBUG(cout << "Resolve TensorProduct Line 1224  LAZYDEBUG(cout << "Resolve TensorProduct
1224    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1225    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0
1226      // Get the values of sub-expressions (leave a gap of one sample for the result).      // Get the values of sub-expressions (leave a gap of one sample for the result).
1227    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    int gap=offset+m_left->getMaxSampleSize();    // actually only needs to be m_left->m_samplesize
1228    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1229      gap+=m_right->getMaxSampleSize();
1230      const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1231    LAZYDEBUG(cout << "Post sub calls: " << toString() << endl;)
1232    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1233    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1234    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1235    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1236    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1237    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1238    switch(m_op)    switch(m_op)
1239    {    {
1240      case PROD:      case PROD:
1241      for (int i=0;i<steps;++i,resultp+=resultStep)      for (int i=0;i<steps;++i,resultp+=resultStep)
1242      {      {
1243    LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1244    LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1245    LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1246            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1247            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1248    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1249    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1250            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1251        lroffset+=leftStep;        lroffset+=leftStep;
1252        rroffset+=rightStep;        rroffset+=rightStep;
# Line 1142  LAZYDEBUG(cout << "Resolve sample " << t Line 1292  LAZYDEBUG(cout << "Resolve sample " << t
1292      if (m_readytype=='C')      if (m_readytype=='C')
1293      {      {
1294      roffset=0;      roffset=0;
1295    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1296      return &(vec);      return &(vec);
1297      }      }
1298      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
1299    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1300      return &(vec);      return &(vec);
1301    }    }
1302    if (m_readytype!='E')    if (m_readytype!='E')
# Line 1162  LAZYDEBUG(cout << "Resolve sample " << t Line 1314  LAZYDEBUG(cout << "Resolve sample " << t
1314    default:    default:
1315      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1316    }    }
1317    
1318  }  }
1319    
1320    
# Line 1199  LAZYDEBUG(cout << "Buffer created with s Line 1352  LAZYDEBUG(cout << "Buffer created with s
1352    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1353    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Vector storing the answer
1354    size_t resoffset=0;       // where in the vector to find the answer    size_t resoffset=0;       // where in the vector to find the answer
1355    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1356    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1357    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
1358    {    {
1359          if (sample==0)  {ENABLEDEBUG}
1360  LAZYDEBUG(cout << "################################# " << sample << endl;)  LAZYDEBUG(cout << "################################# " << sample << endl;)
1361  #ifdef _OPENMP  #ifdef _OPENMP
1362      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
# Line 1209  LAZYDEBUG(cout << "##################### Line 1364  LAZYDEBUG(cout << "#####################
1364      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
1365  #endif  #endif
1366  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1367    LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1368      outoffset=result->getPointOffset(sample,0);      outoffset=result->getPointOffset(sample,0);
1369  LAZYDEBUG(cerr << "offset=" << outoffset << endl;)  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1370      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
1371      {      {
1372    // LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << endl;)
1373      resvec[outoffset]=(*res)[resoffset];      resvec[outoffset]=(*res)[resoffset];
1374      }      }
1375    LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1376  LAZYDEBUG(cerr << "*********************************" << endl;)  LAZYDEBUG(cerr << "*********************************" << endl;)
1377        DISABLEDEBUG
1378    }    }
1379    return resptr;    return resptr;
1380  }  }

Legend:
Removed from v.2147  
changed lines
  Added in v.2157

  ViewVC Help
Powered by ViewVC 1.1.26