/[escript]/trunk/escript/src/DataLazy.cpp
ViewVC logotype

Diff of /trunk/escript/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2151 by jfenwick, Wed Dec 10 04:41:26 2008 UTC revision 2152 by jfenwick, Thu Dec 11 04:26:42 2008 UTC
# Line 970  DataLazy::resolveNP1OUT_P(ValueType& v, Line 970  DataLazy::resolveNP1OUT_P(ValueType& v,
970    
971    
972  #define PROC_OP(TYPE,X)                               \  #define PROC_OP(TYPE,X)                               \
973      for (int i=0;i<steps;++i,resultp+=resultStep) \      for (int j=0;j<onumsteps;++j)\
974      { \      {\
975         tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \        for (int i=0;i<numsteps;++i,resultp+=resultStep) \
976         lroffset+=leftStep; \        { \
977         rroffset+=rightStep; \  LAZYDEBUG(cout << "[left,right]=[" << lroffset << "," << rroffset << "]" << endl;)\
978             tensor_binary_operation< TYPE >(chunksize, &((*left)[lroffset]), &((*right)[rroffset]), resultp, X); \
979             lroffset+=leftstep; \
980             rroffset+=rightstep; \
981          }\
982          lroffset+=oleftstep;\
983          rroffset+=orightstep;\
984      }      }
985    
986  /*  /*
# Line 1013  LAZYDEBUG(cout << "Resolve binary: " << Line 1019  LAZYDEBUG(cout << "Resolve binary: " <<
1019    }    }
1020    bool leftScalar=(m_left->getRank()==0);    bool leftScalar=(m_left->getRank()==0);
1021    bool rightScalar=(m_right->getRank()==0);    bool rightScalar=(m_right->getRank()==0);
1022    bool bigloops=((leftExp && rightExp) || (!leftExp && !rightExp)); // is processing in single step?    if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
   int steps=(bigloops?1:getNumDPPSample());  
   size_t chunksize=(bigloops? m_samplesize : getNoValues());    // if bigloops, pretend the whole sample is a datapoint  
   if (m_left->getRank()!=m_right->getRank())    // need to deal with scalar * ? ops  
1023    {    {
1024      if (!leftScalar && !rightScalar)      throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1025      }
1026      size_t leftsize=m_left->getNoValues();
1027      size_t rightsize=m_right->getNoValues();
1028      size_t chunksize=1;           // how many doubles will be processed in one go
1029      int leftstep=0;       // how far should the left offset advance after each step
1030      int rightstep=0;
1031      int numsteps=0;       // total number of steps for the inner loop
1032      int oleftstep=0;  // the o variables refer to the outer loop
1033      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1034      int onumsteps=1;
1035      
1036      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1037      bool RES=(rightExp && rightScalar);
1038      bool LS=(!leftExp && leftScalar); // left is a single scalar
1039      bool RS=(!rightExp && rightScalar);
1040      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1041      bool RN=(!rightExp && !rightScalar);
1042      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1043      bool REN=(rightExp && !rightScalar);
1044    
1045      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1046      {
1047        chunksize=m_left->getNumDPPSample()*leftsize;
1048        leftstep=0;
1049        rightstep=0;
1050        numsteps=1;
1051      }
1052      else if (LES || RES)
1053      {
1054        chunksize=1;
1055        if (LES)        // left is an expanded scalar
1056      {      {
1057         throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");          if (RS)
1058            {
1059               leftstep=1;
1060               rightstep=0;
1061               numsteps=m_left->getNumDPPSample();
1062            }
1063            else        // RN or REN
1064            {
1065               leftstep=0;
1066               oleftstep=1;
1067               rightstep=1;
1068               orightstep=(RN?-rightsize:0);
1069               numsteps=rightsize;
1070               onumsteps=m_left->getNumDPPSample();
1071            }
1072      }      }
1073      steps=getNumDPPSample()*max(m_left->getNoValues(),m_right->getNoValues());      else        // right is an expanded scalar
1074      chunksize=1;    // for scalar      {
1075    }              if (LS)
1076    int leftStep=((leftExp && (!rightExp || rightScalar))? m_right->getNoValues() : 0);          {
1077    int rightStep=((rightExp && (!leftExp || leftScalar))? m_left->getNoValues() : 0);             rightstep=1;
1078    int resultStep=max(leftStep,rightStep);   // only one (at most) should be !=0             leftstep=0;
1079               numsteps=m_right->getNumDPPSample();
1080            }
1081            else
1082            {
1083               rightstep=0;
1084               orightstep=1;
1085               leftstep=1;
1086               oleftstep=(LN?-leftsize:0);
1087               numsteps=leftsize;
1088               onumsteps=m_right->getNumDPPSample();
1089            }
1090        }
1091      }
1092      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1093      {
1094        if (LEN)    // and Right will be a single value
1095        {
1096            chunksize=rightsize;
1097            leftstep=rightsize;
1098            rightstep=0;
1099            numsteps=m_left->getNumDPPSample();
1100            if (RS)
1101            {
1102               numsteps*=leftsize;
1103            }
1104        }
1105        else    // REN
1106        {
1107            chunksize=leftsize;
1108            rightstep=leftsize;
1109            leftstep=0;
1110            numsteps=m_right->getNumDPPSample();
1111            if (LS)
1112            {
1113               numsteps*=rightsize;
1114            }
1115        }
1116      }
1117    
1118      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
1119      // Get the values of sub-expressions      // Get the values of sub-expressions
1120    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);    const ValueType* left=m_left->resolveSample(v,offset,sampleNo,lroffset);
1121    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note    const ValueType* right=m_right->resolveSample(v,offset+m_samplesize,sampleNo,rroffset); // Note
1122      // the right child starts further along.      // the right child starts further along.
1123    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1124    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1125    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1126    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1127    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1128    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1129    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1130    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved    double* resultp=&(v[offset]);     // results are stored at the vector offset we recieved
1131    switch(m_op)    switch(m_op)
1132    {    {
# Line 1058  LAZYDEBUG(cout << "Resolve binary: " << Line 1153  LAZYDEBUG(cout << "Resolve binary: " <<
1153  }  }
1154    
1155    
1156    
1157  /*  /*
1158    \brief Compute the value of the expression (tensor product) for the given sample.    \brief Compute the value of the expression (tensor product) for the given sample.
1159    \return Vector which stores the value of the subexpression for the given sample.    \return Vector which stores the value of the subexpression for the given sample.
# Line 1142  LAZYDEBUG(cout << "Resolve sample " << t Line 1238  LAZYDEBUG(cout << "Resolve sample " << t
1238      if (m_readytype=='C')      if (m_readytype=='C')
1239      {      {
1240      roffset=0;      roffset=0;
1241    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1242      return &(vec);      return &(vec);
1243      }      }
1244      roffset=m_id->getPointOffset(sampleNo, 0);      roffset=m_id->getPointOffset(sampleNo, 0);
1245    LAZYDEBUG(cout << "Finish  sample " << toString() << endl;)
1246      return &(vec);      return &(vec);
1247    }    }
1248    if (m_readytype!='E')    if (m_readytype!='E')
# Line 1162  LAZYDEBUG(cout << "Resolve sample " << t Line 1260  LAZYDEBUG(cout << "Resolve sample " << t
1260    default:    default:
1261      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");      throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1262    }    }
1263    
1264  }  }
1265    
1266    
# Line 1199  LAZYDEBUG(cout << "Buffer created with s Line 1298  LAZYDEBUG(cout << "Buffer created with s
1298    int totalsamples=getNumSamples();    int totalsamples=getNumSamples();
1299    const ValueType* res=0;   // Vector storing the answer    const ValueType* res=0;   // Vector storing the answer
1300    size_t resoffset=0;       // where in the vector to find the answer    size_t resoffset=0;       // where in the vector to find the answer
1301    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
1302    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
1303    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
1304    {    {

Legend:
Removed from v.2151  
changed lines
  Added in v.2152

  ViewVC Help
Powered by ViewVC 1.1.26