/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 6516 by jfenwick, Mon Mar 6 03:55:11 2017 UTC revision 6517 by jfenwick, Mon Mar 6 06:35:31 2017 UTC
# Line 412  void DataLazy::LazyNodeSetup() Line 412  void DataLazy::LazyNodeSetup()
412  {  {
413  #ifdef _OPENMP  #ifdef _OPENMP
414      int numthreads=omp_get_max_threads();      int numthreads=omp_get_max_threads();
415      m_samples_r.resize(numthreads*m_samplesize);      if (m_iscompl)
416        {
417            m_samples_c.resize(numthreads*m_samplesize);
418        }
419        else
420        {
421            m_samples_r.resize(numthreads*m_samplesize);
422        }    
423      m_sampleids=new int[numthreads];      m_sampleids=new int[numthreads];
424      for (int i=0;i<numthreads;++i)      for (int i=0;i<numthreads;++i)
425      {      {
# Line 1055  if (&x<stackend[omp_get_thread_num()]) Line 1062  if (&x<stackend[omp_get_thread_num()])
1062    }    }
1063  }  }
1064    
1065    // The result will be stored in m_samples
1066    // The return value is a pointer to the DataVector, offset is the offset within the return value
1067    const DataTypes::CplxVectorType*
1068    DataLazy::resolveNodeSampleCplx(int tid, int sampleNo, size_t& roffset) const
1069    {
1070    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1071            // collapse so we have a 'E' node or an IDENTITY for some other type
1072      if (m_readytype!='E' && m_op!=IDENTITY)
1073      {
1074            collapse();
1075      }
1076      if (m_op==IDENTITY)  
1077      {
1078        const CplxVectorType& vec=m_id->getVectorROC();
1079        roffset=m_id->getPointOffset(sampleNo, 0);
1080    #ifdef LAZY_STACK_PROF
1081    int x;
1082    if (&x<stackend[omp_get_thread_num()])
1083    {
1084           stackend[omp_get_thread_num()]=&x;
1085    }
1086    #endif
1087        return &(vec);
1088      }
1089      if (m_readytype!='E')
1090      {
1091        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1092      }
1093      if (m_sampleids[tid]==sampleNo)
1094      {
1095            roffset=tid*m_samplesize;
1096            return &(m_samples_c);            // sample is already resolved
1097      }
1098      m_sampleids[tid]=sampleNo;
1099    
1100      switch (getOpgroup(m_op))
1101      {
1102      case G_UNARY:
1103      case G_UNARY_P: return resolveNodeUnaryCplx(tid, sampleNo, roffset);
1104      case G_BINARY: return resolveNodeBinaryCplx(tid, sampleNo, roffset);
1105      case G_NP1OUT: return resolveNodeNP1OUTCplx(tid, sampleNo, roffset);
1106      case G_NP1OUT_P: return resolveNodeNP1OUT_PCplx(tid, sampleNo, roffset);
1107      case G_TENSORPROD: return resolveNodeTProdCplx(tid, sampleNo, roffset);
1108      case G_NP1OUT_2P: return resolveNodeNP1OUT_2PCplx(tid, sampleNo, roffset);
1109      case G_REDUCTION: return resolveNodeReductionCplx(tid, sampleNo, roffset);
1110      case G_CONDEVAL: return resolveNodeCondEvalCplx(tid, sampleNo, roffset);
1111      default:
1112        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1113      }
1114    }
1115    
1116  const DataTypes::RealVectorType*  const DataTypes::RealVectorType*
1117  DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset) const
1118  {  {
# Line 1088  DataLazy::resolveNodeUnary(int tid, int Line 1146  DataLazy::resolveNodeUnary(int tid, int
1146    return &(m_samples_r);    return &(m_samples_r);
1147  }  }
1148    
1149    const DataTypes::CplxVectorType*
1150    DataLazy::resolveNodeUnaryCplx(int tid, int sampleNo, size_t& roffset) const
1151    {
1152            // we assume that any collapsing has been done before we get here
1153            // since we only have one argument we don't need to think about only
1154            // processing single points.
1155            // we will also know we won't get identity nodes
1156      if (m_readytype!='E')
1157      {
1158        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1159      }
1160      if (m_op==IDENTITY)
1161      {
1162        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1163      }
1164      const DataTypes::CplxVectorType* leftres=m_left->resolveNodeSampleCplx(tid, sampleNo, roffset);
1165      const DataTypes::cplx_t* left=&((*leftres)[roffset]);
1166      roffset=m_samplesize*tid;
1167      DataTypes::cplx_t* result=&(m_samples_c[roffset]);
1168      if (m_op==POS)
1169      {
1170        // this should be prevented earlier
1171        // operation is meaningless for lazy
1172            throw DataException("Programmer error - POS not supported for lazy data.");    
1173      }
1174      tensor_unary_array_operation(m_samplesize,
1175                                 left,
1176                                 result,
1177                                 m_op,
1178                                 m_tol);  
1179      return &(m_samples_c);
1180    }
1181    
1182    
1183    
1184  const DataTypes::RealVectorType*  const DataTypes::RealVectorType*
1185  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeReduction(int tid, int sampleNo, size_t& roffset) const
# Line 1141  DataLazy::resolveNodeReduction(int tid, Line 1233  DataLazy::resolveNodeReduction(int tid,
1233    return &(m_samples_r);    return &(m_samples_r);
1234  }  }
1235    
1236    const DataTypes::CplxVectorType*
1237    DataLazy::resolveNodeReductionCplx(int tid, int sampleNo, size_t& roffset) const
1238    {
1239            // we assume that any collapsing has been done before we get here
1240            // since we only have one argument we don't need to think about only
1241            // processing single points.
1242            // we will also know we won't get identity nodes
1243      if (m_readytype!='E')
1244      {
1245        throw DataException("Programmer error - resolveReductionCplx should only be called on expanded Data.");
1246      }
1247      if (m_op==IDENTITY)
1248      {
1249        throw DataException("Programmer error - resolveNodeReductionCplx should not be called on identity nodes.");
1250      }
1251      throw DataException("Programmer error - reduction operations MIN and MAX not supported for complex values.");
1252    }
1253    
1254    
1255  const DataTypes::RealVectorType*  const DataTypes::RealVectorType*
1256  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset) const
1257  {  {
# Line 1186  DataLazy::resolveNodeNP1OUT(int tid, int Line 1297  DataLazy::resolveNodeNP1OUT(int tid, int
1297    return &m_samples_r;    return &m_samples_r;
1298  }  }
1299    
1300    
1301    const DataTypes::CplxVectorType*
1302    DataLazy::resolveNodeNP1OUTCplx(int tid, int sampleNo, size_t& roffset) const
1303    {
1304            // we assume that any collapsing has been done before we get here
1305            // since we only have one argument we don't need to think about only
1306            // processing single points.
1307      if (m_readytype!='E')
1308      {
1309        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1310      }
1311      if (m_op==IDENTITY)
1312      {
1313        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1314      }
1315      size_t subroffset;
1316      const CplxVectorType* leftres=m_left->resolveNodeSampleCplx(tid, sampleNo, subroffset);
1317      roffset=m_samplesize*tid;
1318      size_t loop=0;
1319      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1320      size_t step=getNoValues();
1321      size_t offset=roffset;
1322      switch (m_op)
1323      {
1324        case SYM:
1325            for (loop=0;loop<numsteps;++loop)
1326            {
1327                escript::symmetric(*leftres,m_left->getShape(),subroffset, m_samples_c, getShape(), offset);
1328                subroffset+=step;
1329                offset+=step;
1330            }
1331            break;
1332        case NSYM:
1333            for (loop=0;loop<numsteps;++loop)
1334            {
1335                escript::antisymmetric(*leftres,m_left->getShape(),subroffset, m_samples_c, getShape(), offset);
1336                subroffset+=step;
1337                offset+=step;
1338            }
1339            break;
1340        default:
1341            throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1342      }
1343      return &m_samples_c;
1344    }
1345    
1346  const DataTypes::RealVectorType*  const DataTypes::RealVectorType*
1347  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset) const
1348  {  {
# Line 1233  DataLazy::resolveNodeNP1OUT_P(int tid, i Line 1390  DataLazy::resolveNodeNP1OUT_P(int tid, i
1390    return &m_samples_r;    return &m_samples_r;
1391  }  }
1392    
1393    const DataTypes::CplxVectorType*
1394    DataLazy::resolveNodeNP1OUT_PCplx(int tid, int sampleNo, size_t& roffset) const
1395    {
1396            // we assume that any collapsing has been done before we get here
1397            // since we only have one argument we don't need to think about only
1398            // processing single points.
1399      if (m_readytype!='E')
1400      {
1401        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1402      }
1403      if (m_op==IDENTITY)
1404      {
1405        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1406      }
1407      size_t subroffset;
1408      size_t offset;
1409      const CplxVectorType* leftres=m_left->resolveNodeSampleCplx(tid, sampleNo, subroffset);
1410      roffset=m_samplesize*tid;
1411      offset=roffset;
1412      size_t loop=0;
1413      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1414      size_t outstep=getNoValues();
1415      size_t instep=m_left->getNoValues();
1416      switch (m_op)
1417      {
1418        case TRACE:
1419            for (loop=0;loop<numsteps;++loop)
1420            {
1421                escript::trace(*leftres,m_left->getShape(),subroffset, m_samples_c ,getShape(),offset,m_axis_offset);
1422                subroffset+=instep;
1423                offset+=outstep;
1424            }
1425            break;
1426        case TRANS:
1427            for (loop=0;loop<numsteps;++loop)
1428            {
1429                escript::transpose(*leftres,m_left->getShape(),subroffset, m_samples_c, getShape(),offset,m_axis_offset);
1430                subroffset+=instep;
1431                offset+=outstep;
1432            }
1433            break;
1434        default:
1435            throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1436      }
1437      return &m_samples_c;
1438    }
1439    
1440  const DataTypes::RealVectorType*  const DataTypes::RealVectorType*
1441  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset) const
# Line 1270  DataLazy::resolveNodeNP1OUT_2P(int tid, Line 1473  DataLazy::resolveNodeNP1OUT_2P(int tid,
1473    return &m_samples_r;    return &m_samples_r;
1474  }  }
1475    
1476    
1477    const DataTypes::CplxVectorType*
1478    DataLazy::resolveNodeNP1OUT_2PCplx(int tid, int sampleNo, size_t& roffset) const
1479    {
1480      if (m_readytype!='E')
1481      {
1482        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1483      }
1484      if (m_op==IDENTITY)
1485      {
1486        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1487      }
1488      size_t subroffset;
1489      size_t offset;
1490      const CplxVectorType* leftres=m_left->resolveNodeSampleCplx(tid, sampleNo, subroffset);
1491      roffset=m_samplesize*tid;
1492      offset=roffset;
1493      size_t loop=0;
1494      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1495      size_t outstep=getNoValues();
1496      size_t instep=m_left->getNoValues();
1497      switch (m_op)
1498      {
1499        case SWAP:
1500            for (loop=0;loop<numsteps;++loop)
1501            {
1502                escript::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples_c, getShape(),offset, m_axis_offset, m_transpose);
1503                subroffset+=instep;
1504                offset+=outstep;
1505            }
1506            break;
1507        default:
1508            throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1509      }
1510      return &m_samples_c;
1511    }
1512    
1513  const DataTypes::RealVectorType*  const DataTypes::RealVectorType*
1514  DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const  DataLazy::resolveNodeCondEval(int tid, int sampleNo, size_t& roffset) const
1515  {  {
# Line 1305  DataLazy::resolveNodeCondEval(int tid, i Line 1545  DataLazy::resolveNodeCondEval(int tid, i
1545    return &m_samples_r;    return &m_samples_r;
1546  }  }
1547    
1548    const DataTypes::CplxVectorType*
1549    DataLazy::resolveNodeCondEvalCplx(int tid, int sampleNo, size_t& roffset) const
1550    {
1551      if (m_readytype!='E')
1552      {
1553        throw DataException("Programmer error - resolveNodeCondEval should only be called on expanded Data.");
1554      }
1555      if (m_op!=CONDEVAL)
1556      {
1557        throw DataException("Programmer error - resolveNodeCondEval should only be called on CONDEVAL nodes.");
1558      }
1559      size_t subroffset;
1560    
1561      const RealVectorType* maskres=m_mask->resolveNodeSample(tid, sampleNo, subroffset);
1562      const CplxVectorType* srcres=0;
1563      if ((*maskres)[subroffset]>0)
1564      {
1565            srcres=m_left->resolveNodeSampleCplx(tid, sampleNo, subroffset);
1566      }
1567      else
1568      {
1569            srcres=m_right->resolveNodeSampleCplx(tid, sampleNo, subroffset);
1570      }
1571    
1572      // Now we need to copy the result
1573    
1574      roffset=m_samplesize*tid;
1575      for (int i=0;i<m_samplesize;++i)
1576      {
1577            m_samples_c[roffset+i]=(*srcres)[subroffset+i];  
1578      }
1579    
1580      return &m_samples_c;
1581    }
1582    
1583    
1584  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1585  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1586  // If both children are expanded, then we can process them in a single operation (we treat  // If both children are expanded, then we can process them in a single operation (we treat
# Line 1447  LAZYDEBUG(cout << "Right res["<< rroffse Line 1723  LAZYDEBUG(cout << "Right res["<< rroffse
1723    {    {
1724      case ADD:      case ADD:
1725          //PROC_OP(NO_ARG,plus<double>());          //PROC_OP(NO_ARG,plus<double>());
1726        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,        escript::binaryOpVectorLazyArithmeticHelper<real_t, real_t, real_t>(resultp,
1727               &(*left)[0],               &(*left)[0],
1728               &(*right)[0],               &(*right)[0],
1729               chunksize,               chunksize,
# Line 1463  LAZYDEBUG(cout << "Right res["<< rroffse Line 1739  LAZYDEBUG(cout << "Right res["<< rroffse
1739               escript::ES_optype::ADD);                 escript::ES_optype::ADD);  
1740          break;          break;
1741      case SUB:      case SUB:
1742        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,        escript::binaryOpVectorLazyArithmeticHelper<real_t, real_t, real_t>(resultp,
1743               &(*left)[0],               &(*left)[0],
1744               &(*right)[0],               &(*right)[0],
1745               chunksize,               chunksize,
# Line 1481  LAZYDEBUG(cout << "Right res["<< rroffse Line 1757  LAZYDEBUG(cout << "Right res["<< rroffse
1757          break;          break;
1758      case MUL:      case MUL:
1759          //PROC_OP(NO_ARG,multiplies<double>());          //PROC_OP(NO_ARG,multiplies<double>());
1760        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,        escript::binaryOpVectorLazyArithmeticHelper<real_t, real_t, real_t>(resultp,
1761               &(*left)[0],               &(*left)[0],
1762               &(*right)[0],               &(*right)[0],
1763               chunksize,               chunksize,
# Line 1498  LAZYDEBUG(cout << "Right res["<< rroffse Line 1774  LAZYDEBUG(cout << "Right res["<< rroffse
1774          break;          break;
1775      case DIV:      case DIV:
1776          //PROC_OP(NO_ARG,divides<double>());          //PROC_OP(NO_ARG,divides<double>());
1777        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,        escript::binaryOpVectorLazyArithmeticHelper<real_t, real_t, real_t>(resultp,
1778               &(*left)[0],               &(*left)[0],
1779               &(*right)[0],               &(*right)[0],
1780               chunksize,               chunksize,
# Line 1515  LAZYDEBUG(cout << "Right res["<< rroffse Line 1791  LAZYDEBUG(cout << "Right res["<< rroffse
1791          break;          break;
1792      case POW:      case POW:
1793         //PROC_OP(double (double,double),::pow);         //PROC_OP(double (double,double),::pow);
1794        escript::binaryOpVectorLazyHelper<real_t, real_t, real_t>(resultp,        escript::binaryOpVectorLazyArithmeticHelper<real_t, real_t, real_t>(resultp,
1795               &(*left)[0],               &(*left)[0],
1796               &(*right)[0],               &(*right)[0],
1797               chunksize,               chunksize,
# Line 1537  LAZYDEBUG(cout << "Result res[" << roffs Line 1813  LAZYDEBUG(cout << "Result res[" << roffs
1813    return &m_samples_r;    return &m_samples_r;
1814  }  }
1815    
1816    const DataTypes::CplxVectorType*
1817    DataLazy::resolveNodeBinaryCplx(int tid, int sampleNo, size_t& roffset) const
1818    {
1819    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1820    
1821      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
1822            // first work out which of the children are expanded
1823      bool leftExp=(m_left->m_readytype=='E');
1824      bool rightExp=(m_right->m_readytype=='E');
1825      if (!leftExp && !rightExp)
1826      {
1827            throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1828      }
1829      bool leftScalar=(m_left->getRank()==0);
1830      bool rightScalar=(m_right->getRank()==0);
1831      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1832      {
1833            throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1834      }
1835      size_t leftsize=m_left->getNoValues();
1836      size_t rightsize=m_right->getNoValues();
1837      size_t chunksize=1;                   // how many doubles will be processed in one go
1838      int leftstep=0;               // how far should the left offset advance after each step
1839      int rightstep=0;
1840      int numsteps=0;               // total number of steps for the inner loop
1841      int oleftstep=0;      // the o variables refer to the outer loop
1842      int orightstep=0;     // The outer loop is only required in cases where there is an extended scalar
1843      int onumsteps=1;
1844      
1845      bool LES=(leftExp && leftScalar);     // Left is an expanded scalar
1846      bool RES=(rightExp && rightScalar);
1847      bool LS=(!leftExp && leftScalar);     // left is a single scalar
1848      bool RS=(!rightExp && rightScalar);
1849      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1850      bool RN=(!rightExp && !rightScalar);
1851      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1852      bool REN=(rightExp && !rightScalar);
1853    
1854      if ((LES && RES) || (LEN && REN))     // both are Expanded scalars or both are expanded non-scalars
1855      {
1856            chunksize=m_left->getNumDPPSample()*leftsize;
1857            leftstep=0;
1858            rightstep=0;
1859            numsteps=1;
1860      }
1861      else if (LES || RES)
1862      {
1863            chunksize=1;
1864            if (LES)                // left is an expanded scalar
1865            {
1866                    if (RS)
1867                    {
1868                       leftstep=1;
1869                       rightstep=0;
1870                       numsteps=m_left->getNumDPPSample();
1871                    }
1872                    else            // RN or REN
1873                    {
1874                       leftstep=0;
1875                       oleftstep=1;
1876                       rightstep=1;
1877                       orightstep=(RN ? -(int)rightsize : 0);
1878                       numsteps=rightsize;
1879                       onumsteps=m_left->getNumDPPSample();
1880                    }
1881            }
1882            else            // right is an expanded scalar
1883            {
1884                    if (LS)
1885                    {
1886                       rightstep=1;
1887                       leftstep=0;
1888                       numsteps=m_right->getNumDPPSample();
1889                    }
1890                    else
1891                    {
1892                       rightstep=0;
1893                       orightstep=1;
1894                       leftstep=1;
1895                       oleftstep=(LN ? -(int)leftsize : 0);
1896                       numsteps=leftsize;
1897                       onumsteps=m_right->getNumDPPSample();
1898                    }
1899            }
1900      }
1901      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1902      {
1903            if (LEN)        // and Right will be a single value
1904            {
1905                    chunksize=rightsize;
1906                    leftstep=rightsize;
1907                    rightstep=0;
1908                    numsteps=m_left->getNumDPPSample();
1909                    if (RS)
1910                    {
1911                       numsteps*=leftsize;
1912                    }
1913            }
1914            else    // REN
1915            {
1916                    chunksize=leftsize;
1917                    rightstep=leftsize;
1918                    leftstep=0;
1919                    numsteps=m_right->getNumDPPSample();
1920                    if (LS)
1921                    {
1922                       numsteps*=rightsize;
1923                    }
1924            }
1925      }
1926    
1927      int resultStep=max(leftstep,rightstep);       // only one (at most) should be !=0
1928            // Get the values of sub-expressions
1929      const CplxVectorType* left=m_left->resolveNodeSampleCplx(tid,sampleNo,lroffset);      
1930      const CplxVectorType* right=m_right->resolveNodeSampleCplx(tid,sampleNo,rroffset);
1931    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
1932    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
1933    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
1934    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
1935    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
1936    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
1937    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
1938    
1939    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
1940    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
1941    
1942    
1943      roffset=m_samplesize*tid;
1944      cplx_t* resultp=&(m_samples_c[roffset]);                // results are stored at the vector offset we received
1945      switch(m_op)
1946      {
1947        case ADD:
1948            //PROC_OP(NO_ARG,plus<double>());
1949          escript::binaryOpVectorLazyArithmeticHelper<cplx_t, cplx_t, cplx_t>(resultp,
1950                 &(*left)[0],
1951                 &(*right)[0],
1952                 chunksize,
1953                 onumsteps,
1954                 numsteps,
1955                 resultStep,
1956                 leftstep,
1957                 rightstep,
1958                 oleftstep,
1959                 orightstep,
1960                 lroffset,
1961                 rroffset,
1962                 escript::ES_optype::ADD);  
1963            break;
1964        case SUB:
1965          escript::binaryOpVectorLazyArithmeticHelper<cplx_t, cplx_t, cplx_t>(resultp,
1966                 &(*left)[0],
1967                 &(*right)[0],
1968                 chunksize,
1969                 onumsteps,
1970                 numsteps,
1971                 resultStep,
1972                 leftstep,
1973                 rightstep,
1974                 oleftstep,
1975                 orightstep,
1976                 lroffset,
1977                 rroffset,
1978                 escript::ES_optype::SUB);        
1979            //PROC_OP(NO_ARG,minus<double>());
1980            break;
1981        case MUL:
1982            //PROC_OP(NO_ARG,multiplies<double>());
1983          escript::binaryOpVectorLazyArithmeticHelper<cplx_t, cplx_t, cplx_t>(resultp,
1984                 &(*left)[0],
1985                 &(*right)[0],
1986                 chunksize,
1987                 onumsteps,
1988                 numsteps,
1989                 resultStep,
1990                 leftstep,
1991                 rightstep,
1992                 oleftstep,
1993                 orightstep,
1994                 lroffset,
1995                 rroffset,
1996                 escript::ES_optype::MUL);        
1997            break;
1998        case DIV:
1999            //PROC_OP(NO_ARG,divides<double>());
2000          escript::binaryOpVectorLazyArithmeticHelper<cplx_t, cplx_t, cplx_t>(resultp,
2001                 &(*left)[0],
2002                 &(*right)[0],
2003                 chunksize,
2004                 onumsteps,
2005                 numsteps,
2006                 resultStep,
2007                 leftstep,
2008                 rightstep,
2009                 oleftstep,
2010                 orightstep,
2011                 lroffset,
2012                 rroffset,
2013                 escript::ES_optype::DIV);        
2014            break;
2015        case POW:
2016           //PROC_OP(double (double,double),::pow);
2017          escript::binaryOpVectorLazyArithmeticHelper<cplx_t, cplx_t, cplx_t>(resultp,
2018                 &(*left)[0],
2019                 &(*right)[0],
2020                 chunksize,
2021                 onumsteps,
2022                 numsteps,
2023                 resultStep,
2024                 leftstep,
2025                 rightstep,
2026                 oleftstep,
2027                 orightstep,
2028                 lroffset,
2029                 rroffset,
2030                 escript::ES_optype::POW);        
2031            break;
2032        default:
2033            throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2034      }
2035    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples_c[roffset] << endl;)
2036      return &m_samples_c;
2037    }
2038    
2039    
2040  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data  // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2041  // have already been collapsed to IDENTITY. So we must have at least one expanded child.  // have already been collapsed to IDENTITY. So we must have at least one expanded child.
# Line 1599  LAZYDEBUG(cout << DataTypes::pointToStri Line 2098  LAZYDEBUG(cout << DataTypes::pointToStri
2098    return &m_samples_r;    return &m_samples_r;
2099  }  }
2100    
2101    const DataTypes::CplxVectorType*
2102    DataLazy::resolveNodeTProdCplx(int tid, int sampleNo, size_t& roffset) const
2103    {
2104    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2105    
2106      size_t lroffset=0, rroffset=0;        // offsets in the left and right result vectors
2107            // first work out which of the children are expanded
2108      bool leftExp=(m_left->m_readytype=='E');
2109      bool rightExp=(m_right->m_readytype=='E');
2110      int steps=getNumDPPSample();
2111      int leftStep=(leftExp? m_left->getNoValues() : 0);            // do not have scalars as input to this method
2112      int rightStep=(rightExp?m_right->getNoValues() : 0);
2113    
2114      int resultStep=getNoValues();
2115      roffset=m_samplesize*tid;
2116      size_t offset=roffset;
2117    
2118      const CplxVectorType* left=m_left->resolveNodeSampleCplx(tid, sampleNo, lroffset);
2119    
2120      const CplxVectorType* right=m_right->resolveNodeSampleCplx(tid, sampleNo, rroffset);
2121    
2122    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
2123    cout << getNoValues() << endl;)
2124    
2125    
2126    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2127    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2128    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2129    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2130    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2131    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2132    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2133    
2134      cplx_t* resultp=&(m_samples_c[offset]);         // results are stored at the vector offset we received
2135      switch(m_op)
2136      {
2137        case PROD:
2138            for (int i=0;i<steps;++i,resultp+=resultStep)
2139            {
2140              const cplx_t *ptr_0 = &((*left)[lroffset]);
2141              const cplx_t *ptr_1 = &((*right)[rroffset]);
2142    
2143    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2144    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2145    
2146              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2147    
2148              lroffset+=leftStep;
2149              rroffset+=rightStep;
2150            }
2151            break;
2152        default:
2153            throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2154      }
2155      roffset=offset;
2156       return &m_samples_c;
2157    }
2158    
2159  const DataTypes::RealVectorType*  const DataTypes::RealVectorType*
2160  DataLazy::resolveSample(int sampleNo, size_t& roffset) const  DataLazy::resolveSample(int sampleNo, size_t& roffset) const
# Line 1633  DataLazy::resolveToIdentity() Line 2189  DataLazy::resolveToIdentity()
2189  {  {
2190     if (m_op==IDENTITY)     if (m_op==IDENTITY)
2191          return;          return;
2192     DataReady_ptr p=resolveNodeWorker();     if (isComplex())
2193     makeIdentity(p);     {
2194            DataReady_ptr p=resolveNodeWorkerCplx();
2195            makeIdentity(p);
2196       }
2197       else
2198       {
2199            DataReady_ptr p=resolveNodeWorker();
2200            makeIdentity(p);
2201       }
2202  }  }
2203    
2204  void DataLazy::makeIdentity(const DataReady_ptr& p)  void DataLazy::makeIdentity(const DataReady_ptr& p)
# Line 1802  LAZYDEBUG(cout << "Final res[" << roffse Line 2366  LAZYDEBUG(cout << "Final res[" << roffse
2366    return resptr;    return resptr;
2367  }  }
2368    
2369    // This version should only be called on complex lazy nodes
2370    DataReady_ptr
2371    DataLazy::resolveNodeWorkerCplx()
2372    {
2373      if (m_readytype!='E')         // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2374      {
2375        collapse();
2376      }
2377      if (m_op==IDENTITY)           // So a lazy expression of Constant or Tagged data will be returned here.
2378      {
2379        return m_id;
2380      }
2381            // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2382      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  CplxVectorType(getNoValues()));
2383      CplxVectorType& resvec=result->getVectorRWC();
2384      DataReady_ptr resptr=DataReady_ptr(result);
2385    
2386      int sample;
2387      int totalsamples=getNumSamples();
2388      const CplxVectorType* res=0;       // Storage for answer
2389    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2390      #pragma omp parallel private(sample,res)
2391      {
2392            size_t roffset=0;
2393    #ifdef LAZY_STACK_PROF
2394            stackstart[omp_get_thread_num()]=&roffset;
2395            stackend[omp_get_thread_num()]=&roffset;
2396    #endif
2397            #pragma omp for schedule(static)
2398            for (sample=0;sample<totalsamples;++sample)
2399            {
2400                    roffset=0;
2401    #ifdef _OPENMP
2402                    res=resolveNodeSampleCplx(omp_get_thread_num(),sample,roffset);
2403    #else
2404                    res=resolveNodeSampleCplx(0,sample,roffset);
2405    #endif
2406    LAZYDEBUG(cout << "Sample #" << sample << endl;)
2407    LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2408                    CplxVectorType::size_type outoffset=result->getPointOffset(sample,0);
2409                    memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(CplxVectorType::ElementType));
2410            }
2411      }
2412    #ifdef LAZY_STACK_PROF
2413      for (int i=0;i<getNumberOfThreads();++i)
2414      {
2415            size_t r=((size_t)stackstart[i] - (size_t)stackend[i]);
2416    //      cout << i << " " << stackstart[i] << " .. " << stackend[i] << " = " <<  r << endl;
2417            if (r>maxstackuse)
2418            {
2419                    maxstackuse=r;
2420            }
2421      }
2422      cout << "Max resolve Stack use=" << maxstackuse << endl;
2423    #endif
2424      return resptr;
2425    }
2426    
2427    
2428  std::string  std::string
2429  DataLazy::toString() const  DataLazy::toString() const
2430  {  {

Legend:
Removed from v.6516  
changed lines
  Added in v.6517

  ViewVC Help
Powered by ViewVC 1.1.26