/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2147 by jfenwick, Wed Dec 10 04:41:26 2008 UTC revision 2171 by phornby, Wed Dec 17 08:21:29 2008 UTC
# Line 28  Line 28 
28  #include "UnaryFuncs.h"     // for escript::fsign  #include "UnaryFuncs.h"     // for escript::fsign
29  #include "Utils.h"  #include "Utils.h"
30    
31  // #define LAZYDEBUG(X) X;  // #define LAZYDEBUG(X) if (privdebug){X;}
32  #define LAZYDEBUG(X)  #define LAZYDEBUG(X)
33    namespace
34    {
35    bool privdebug=false;
36    
37    #define ENABLEDEBUG privdebug=true;
38    #define DISABLEDEBUG privdebug=false;
39    }
40    
41  /*  /*
42  How does DataLazy work?  How does DataLazy work?
# Line 285  GTPShape(DataAbstract_ptr left, DataAbst Line 292  GTPShape(DataAbstract_ptr left, DataAbst
292  // determine the number of samples requires to evaluate an expression combining left and right  // determine the number of samples requires to evaluate an expression combining left and right
293  // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.  // NP1OUT needs an extra buffer because we can't write the answers over the top of the input.
294  // The same goes for G_TENSORPROD  // The same goes for G_TENSORPROD
295    // It might seem that pointwise binary ops (G_BINARY) could be written over the top of the lefths.
296    // This would be true were it not for the possibility that the LHS could be a scalar which needs to be examined
297    // multiple times
298  int  int
299  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)  calcBuffs(const DataLazy_ptr& left, const DataLazy_ptr& right, ES_optype op)
300  {  {
301     switch(getOpgroup(op))     switch(getOpgroup(op))
302     {     {
303     case G_IDENTITY: return 1;     case G_IDENTITY: return 1;
304     case G_BINARY: return max(left->getBuffsRequired(),right->getBuffsRequired()+1);     case G_BINARY: return 1+max(left->getBuffsRequired(),right->getBuffsRequired()+1);
305     case G_UNARY:     case G_UNARY:
306     case G_UNARY_P: return max(left->getBuffsRequired(),1);     case G_UNARY_P: return max(left->getBuffsRequired(),1);
307     case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);     case G_NP1OUT: return 1+max(left->getBuffsRequired(),1);
# Line 912  DataLazy::resolveNP1OUT(ValueType& v, si Line 922  DataLazy::resolveNP1OUT(ValueType& v, si
922    }    }
923      // since we can't write the result over the input, we need a result offset further along      // since we can't write the result over the input, we need a result offset further along
924    size_t subroffset=roffset+m_samplesize;    size_t subroffset=roffset+m_samplesize;
925    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);  LAZYDEBUG(cerr << "subroffset=" << subroffset << endl;)
926      const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
927    roffset=offset;    roffset=offset;
928      size_t loop=0;
929      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
930      size_t step=getNoValues();
931    switch (m_op)    switch (m_op)
932    {    {
933      case SYM:      case SYM:
934      DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
935        {
936            DataMaths::symmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
937            subroffset+=step;
938            offset+=step;
939        }
940      break;      break;
941      case NSYM:      case NSYM:
942      DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);      for (loop=0;loop<numsteps;++loop)
943        {
944            DataMaths::nonsymmetric(*vleft,m_left->getShape(),subroffset, v, getShape(), offset);
945            subroffset+=step;
946            offset+=step;
947        }
948      break;      break;
949      default:      default:
950      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
# Line 951  DataLazy::resolveNP1OUT_P(ValueType& v, Line 975  DataLazy::resolveNP1OUT_P(ValueType& v,
975      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");      throw DataException("Programmer error - resolveNP1OUT_P should only be called on expanded Data.");
976    }    }
977      // since we can't write the result over the input, we need a result offset further along      // since we can't write the result over the input, we need a result offset further along
978    size_t subroffset=roffset+m_samplesize;    size_t subroffset;
979    const ValueType* vleft=m_left->resolveSample(v,offset,sampleNo,subroffset);    const ValueType* vleft=m_left->resolveSample(v,offset+m_left->m_samplesize,sampleNo,subroffset);
980    LAZYDEBUG(cerr << "srcsamplesize=" << offset+m_left->m_samplesize << " beg=" << subroffset << endl;)
981    LAZYDEBUG(cerr << "Offset for 5800=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << endl;)
982    roffset=offset;    roffset=offset;
983      size_t loop=0;
984      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
985      size_t outstep=getNoValues();
986      size_t instep=m_left->getNoValues();
987    LAZYDEBUG(cerr << "instep=" << instep << " outstep=" << outstep<< " numsteps=" << numsteps << endl;)
988    switch (m_op)    switch (m_op)
989    {    {
990      case TRACE:      case TRACE:
991           DataMaths::trace(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);      for (loop=0;loop<numsteps;++loop)
992        {
993    size_t zz=sampleNo*getNumDPPSample()+loop;
994    if (zz==5800)
995    {
996    LAZYDEBUG(cerr << "point=" <<  zz<< endl;)
997    LAZYDEBUG(cerr << "Input to  trace=" << DataTypes::pointToString(*vleft,m_left->getShape(),subroffset,"") << endl;)
998    LAZYDEBUG(cerr << "Offset for point=" << getPointOffset(5800/getNumDPPSample(),5800%getNumDPPSample()) << " vs ";)
999    LAZYDEBUG(cerr << subroffset << endl;)
1000    LAZYDEBUG(cerr << "output=" << offset << endl;)
1001    }
1002                DataMaths::trace(*vleft,m_left->getShape(),subroffset, v ,getShape(),offset,m_axis_offset);
1003    if (zz==5800)
1004    {
1005    LAZYDEBUG(cerr << "Result of trace=" << DataTypes::pointToString(v,getShape(),offset,"") << endl;)
1006    }
1007            subroffset+=instep;
1008            offset+=outstep;
1009        }
1010      break;      break;
1011      case TRANS:      case TRANS:
1012           DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);      for (loop=0;loop<numsteps;++loop)
1013        {
1014                DataMaths::transpose(*vleft,m_left->getShape(),subroffset, v,getShape(),offset,m_axis_offset);
1015            subroffset+=instep;
1016            offset+=outstep;
1017        }
1018      break;      break;
1019      default:      default:
1020      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");      throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
# Line 970  DataLazy::resolveNP1OUT_P(ValueType& v, Line 1024  DataLazy::resolveNP1OUT_P(ValueType& v,
1024    
1025    
1026  #define PROC_OP(TYPE,X)                               \  #define PROC_OP(TYPE,X)                               \
1027      for (int i=0;i<steps;++i,resultp+=resultStep) \      for (int j=0;j<onumsteps;++j)\
1028      { \      {\
1029         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
1030         lroffset+=leftStep; \        { \
1031         rroffset+=rightStep; \  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
1032    LAZYDEBUG(cout << "{left,right}={" << (*left)[lroffset] << "," << (*right)[rroffset] << "}\n";)\
1033             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
1034    LAZYDEBUG(cout << " result=      " << resultp[0] << endl;) \
1035             lroffset+=leftstep; \
1036             rroffset+=rightstep; \
1037          }\
1038          lroffset+=oleftstep;\
1039          rroffset+=orightstep;\
1040      }      }
1041    
1042  /*  /*
# Line 1013  LAZYDEBUG(cout << "Resolve binary: " << Line 1075  LAZYDEBUG(cout << "Resolve binary: " <<
1075    }    }
1076    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1077    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1078    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
   int steps=(bigloops?1:getNumDPPSample());  
   size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint  
   if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops  
1079    {    {
1080      if (!leftScalar && !rightScalar)      throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1081      }
1082      size_t leftsize=m_left->getNoValues();
1083      size_t rightsize=m_right->getNoValues();
1084      size_t chunksize=1;           // how many doubles will be processed in one go
1085      int leftstep=0;       // how far should the left offset advance after each step
1086      int rightstep=0;
1087      int numsteps=0;       // total number of steps for the inner loop
1088      int oleftstep=0;  // the o variables refer to the outer loop
1089      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1090      int onumsteps=1;
1091      
1092      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1093      bool RES=(rightExp && rightScalar);
1094      bool LS=(!leftExp && leftScalar); // left is a single scalar
1095      bool RS=(!rightExp && rightScalar);
1096      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1097      bool RN=(!rightExp && !rightScalar);
1098      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1099      bool REN=(rightExp && !rightScalar);
1100    
1101      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1102      {
1103        chunksize=m_left->getNumDPPSample()*leftsize;
1104        leftstep=0;
1105        rightstep=0;
1106        numsteps=1;
1107      }
1108      else if (LES || RES)
1109      {
1110        chunksize=1;
1111        if (LES)        // left is an expanded scalar
1112      {      {
1113         throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");          if (RS)
1114            {
1115               leftstep=1;
1116               rightstep=0;
1117               numsteps=m_left->getNumDPPSample();
1118            }
1119            else        // RN or REN
1120            {
1121               leftstep=0;
1122               oleftstep=1;
1123               rightstep=1;
1124               orightstep=(RN ? -(int)rightsize : 0);
1125               numsteps=rightsize;
1126               onumsteps=m_left->getNumDPPSample();
1127            }
1128      }      }
1129      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());      else        // right is an expanded scalar
1130      chunksize=1;    // for scalar      {
1131    }              if (LS)
1132    int leftStep=((leftExp && (!rightExp || rightScalar))? m_right->getNoValues() : 0);          {
1133    int rightStep=((rightExp && (!leftExp || leftScalar))? m_left->getNoValues() : 0);             rightstep=1;
1134    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0             leftstep=0;
1135               numsteps=m_right->getNumDPPSample();
1136            }
1137            else
1138            {
1139               rightstep=0;
1140               orightstep=1;
1141               leftstep=1;
1142               oleftstep=(RN ? -(int)leftsize : 0);
1143               numsteps=leftsize;
1144               onumsteps=m_right->getNumDPPSample();
1145            }
1146        }
1147      }
1148      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1149      {
1150        if (LEN)    // and Right will be a single value
1151        {
1152            chunksize=rightsize;
1153            leftstep=rightsize;
1154            rightstep=0;
1155            numsteps=m_left->getNumDPPSample();
1156            if (RS)
1157            {
1158               numsteps*=leftsize;
1159            }
1160        }
1161        else    // REN
1162        {
1163            chunksize=leftsize;
1164            rightstep=leftsize;
1165            leftstep=0;
1166            numsteps=m_right->getNumDPPSample();
1167            if (LS)
1168            {
1169               numsteps*=rightsize;
1170            }
1171        }
1172      }
1173    
1174      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1175      // Get the values of sub-expressions      // Get the values of sub-expressions
1176    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveSample(v,offset+getMaxSampleSize(),sampleNo,lroffset);   // see note on
1177    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note      // calcBufss for why we can't put offset as the 2nd param above
1178      const ValueType* right=m_right->resolveSample(v,offset+2*getMaxSampleSize(),sampleNo,rroffset); // Note
1179      // the right child starts further along.      // the right child starts further along.
1180    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1181    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1182    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1183    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1184    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1185    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1186    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1187    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1188    switch(m_op)    switch(m_op)
1189    {    {
# Line 1058  LAZYDEBUG(cout << "Resolve binary: " << Line 1210  LAZYDEBUG(cout << "Resolve binary: " <<
1210  }  }
1211    
1212    
1213    
1214  /*  /*
1215    \brief Compute the value of the expression (tensor product) for the given sample.    \brief Compute the value of the expression (tensor product) for the given sample.
1216    \return Vector which stores the value of the subexpression for the given sample.    \return Vector which stores the value of the subexpression for the given sample.
# Line 1087  LAZYDEBUG(cout << "Resolve TensorProduct Line 1240  LAZYDEBUG(cout << "Resolve TensorProduct
1240    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);    int rightStep=((rightExp && !leftExp)? m_left->getNoValues() : 0);
1241    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0
1242      // Get the values of sub-expressions (leave a gap of one sample for the result).      // Get the values of sub-expressions (leave a gap of one sample for the result).
1243    const ValueType* left=m_left->resolveSample(v,offset+m_samplesize,sampleNo,lroffset);    int gap=offset+m_left->getMaxSampleSize();    // actually only needs to be m_left->m_samplesize
1244    const ValueType* right=m_right->resolveSample(v,offset+2*m_samplesize,sampleNo,rroffset);    const ValueType* left=m_left->resolveSample(v,gap,sampleNo,lroffset);
1245      gap+=m_right->getMaxSampleSize();
1246      const ValueType* right=m_right->resolveSample(v,gap,sampleNo,rroffset);
1247    LAZYDEBUG(cout << "Post sub calls: " << toString() << endl;)
1248    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
1249    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
1250    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
1251    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
1252    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
1253    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1254    switch(m_op)    switch(m_op)
1255    {    {
1256      case PROD:      case PROD:
1257      for (int i=0;i<steps;++i,resultp+=resultStep)      for (int i=0;i<steps;++i,resultp+=resultStep)
1258      {      {
1259    LAZYDEBUG(cout << "lroffset=" << lroffset << "rroffset=" << rroffset << endl;)
1260    LAZYDEBUG(cout << "l*=" << left << " r*=" << right << endl;)
1261    LAZYDEBUG(cout << "m_SL=" << m_SL << " m_SM=" << m_SM << " m_SR=" << m_SR << endl;)
1262            const double *ptr_0 = &((*left)[lroffset]);            const double *ptr_0 = &((*left)[lroffset]);
1263            const double *ptr_1 = &((*right)[rroffset]);            const double *ptr_1 = &((*right)[rroffset]);
1264    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
1265    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
1266            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);            matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
1267        lroffset+=leftStep;        lroffset+=leftStep;
1268        rroffset+=rightStep;        rroffset+=rightStep;
# Line 1142  LAZYDEBUG(cout << "Resolve sample " << t Line 1308  LAZYDEBUG(cout << "Resolve sample " << t
1308      if (m_readytype=='C')      if (m_readytype=='C')
1309      {      {
1310      roffset=0;      roffset=0;
1311    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1312      return &(vec);      return &(vec);
1313      }      }
1314      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
1315    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1316      return &(vec);      return &(vec);
1317    }    }
1318    if (m_readytype!='E')    if (m_readytype!='E')
# Line 1162  LAZYDEBUG(cout << "Resolve sample " << t Line 1330  LAZYDEBUG(cout << "Resolve sample " << t
1330    default:    default:
1331      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1332    }    }
1333    
1334  }  }
1335    
1336    
# Line 1199  LAZYDEBUG(cout << "Buffer created with s Line 1368  LAZYDEBUG(cout << "Buffer created with s
1368    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1369    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Vector storing the answer
1370    size_t resoffset=0;       // where in the vector to find the answer    size_t resoffset=0;       // where in the vector to find the answer
1371    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1372    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1373    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
1374    {    {
1375          if (sample==0)  {ENABLEDEBUG}
1376    
1377    //       if (sample==5800/getNumDPPSample())  {ENABLEDEBUG}
1378  LAZYDEBUG(cout << "################################# " << sample << endl;)  LAZYDEBUG(cout << "################################# " << sample << endl;)
1379  #ifdef _OPENMP  #ifdef _OPENMP
1380      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
# Line 1209  LAZYDEBUG(cout << "##################### Line 1382  LAZYDEBUG(cout << "#####################
1382      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.      res=resolveSample(v,0,sample,resoffset);   // res would normally be v, but not if its a single IDENTITY op.
1383  #endif  #endif
1384  LAZYDEBUG(cerr << "-------------------------------- " << endl;)  LAZYDEBUG(cerr << "-------------------------------- " << endl;)
1385    LAZYDEBUG(cerr<< "Copying sample#" << sample << endl;)
1386      outoffset=result->getPointOffset(sample,0);      outoffset=result->getPointOffset(sample,0);
1387  LAZYDEBUG(cerr << "offset=" << outoffset << endl;)  LAZYDEBUG(cerr << "offset=" << outoffset << " from offset=" << resoffset << " " << m_samplesize << " doubles" << endl;)
1388      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector      for (unsigned int i=0;i<m_samplesize;++i,++outoffset,++resoffset)   // copy values into the output vector
1389      {      {
1390    LAZYDEBUG(cerr << "outoffset=" << outoffset << " resoffset=" << resoffset << " " << (*res)[resoffset]<< endl;)
1391      resvec[outoffset]=(*res)[resoffset];      resvec[outoffset]=(*res)[resoffset];
1392      }      }
1393    LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
1394  LAZYDEBUG(cerr << "*********************************" << endl;)  LAZYDEBUG(cerr << "*********************************" << endl;)
1395        DISABLEDEBUG
1396    }    }
1397    return resptr;    return resptr;
1398  }  }

Legend:
Removed from v.2147  
changed lines
  Added in v.2171

  ViewVC Help
Powered by ViewVC 1.1.26