/[escript]/branches/clazy/escriptcore/src/DataLazy.cpp
ViewVC logotype

Diff of /branches/clazy/escriptcore/src/DataLazy.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2496 by jfenwick, Fri Jun 26 06:09:47 2009 UTC revision 2514 by jfenwick, Fri Jul 3 00:57:45 2009 UTC
# Line 42  bool privdebug=false; Line 42  bool privdebug=false;
42  #define DISABLEDEBUG privdebug=false;  #define DISABLEDEBUG privdebug=false;
43  }  }
44    
45  // #define SIZELIMIT  // #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl;resolveToIdentity();}
 // #define SIZELIMIT if ((m_height>7) || (m_children>127)) {cerr << "\n!!!!!!! SIZE LIMIT EXCEEDED " << m_children << ";" << m_height << endl << toString() << endl; resolveToIdentity();}  
 //#define SIZELIMIT if ((m_height>7) || (m_children>127)) {resolveToIdentity();}  
46    
47  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}  #define SIZELIMIT if ((m_height>escript::escriptParams.getTOO_MANY_LEVELS()) || (m_children>escript::escriptParams.getTOO_MANY_NODES())) {resolveToIdentity();}
48    
49    
50  /*  /*
51  How does DataLazy work?  How does DataLazy work?
52  ~~~~~~~~~~~~~~~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~
# Line 435  opToString(ES_optype op) Line 434  opToString(ES_optype op)
434    return ES_opstrings[op];    return ES_opstrings[op];
435  }  }
436    
437    #ifdef LAZY_NODE_STORAGE
438    void DataLazy::LazyNodeSetup()
439    {
440    #ifdef _OPENMP
441        int numthreads=omp_get_max_threads();
442        m_samples.resize(numthreads*m_samplesize);
443        m_sampleids=new int[numthreads];
444        for (int i=0;i<numthreads;++i)
445        {
446            m_sampleids[i]=-1;  
447        }
448    #else
449        m_samples.resize(m_samplesize);
450        m_sampleids=new int[1];
451        m_sampleids[0]=-1;
452    #endif  // _OPENMP
453    }
454    #endif   // LAZY_NODE_STORAGE
455    
456    
457    // Creates an identity node
458  DataLazy::DataLazy(DataAbstract_ptr p)  DataLazy::DataLazy(DataAbstract_ptr p)
459      : parent(p->getFunctionSpace(),p->getShape())      : parent(p->getFunctionSpace(),p->getShape())
460    #ifdef LAZY_NODE_STORAGE
461        ,m_sampleids(0),
462        m_samples(1)
463    #endif
464  {  {
465     if (p->isLazy())     if (p->isLazy())
466     {     {
# Line 456  LAZYDEBUG(cout << "Wrapping " << dr.get( Line 479  LAZYDEBUG(cout << "Wrapping " << dr.get(
479  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(1)Lazy created with " << m_samplesize << endl;)
480  }  }
481    
   
   
   
482  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)  DataLazy::DataLazy(DataAbstract_ptr left, ES_optype op)
483      : parent(left->getFunctionSpace(),left->getShape()),      : parent(left->getFunctionSpace(),left->getShape()),
484      m_op(op),      m_op(op),
# Line 487  DataLazy::DataLazy(DataAbstract_ptr left Line 507  DataLazy::DataLazy(DataAbstract_ptr left
507     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
508     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
509     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
510    #ifdef LAZY_NODE_STORAGE
511       LazyNodeSetup();
512    #endif
513     SIZELIMIT     SIZELIMIT
514  }  }
515    
# Line 557  LAZYDEBUG(cout << "Right " << right.get( Line 580  LAZYDEBUG(cout << "Right " << right.get(
580     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
581     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
582     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
583    #ifdef LAZY_NODE_STORAGE
584       LazyNodeSetup();
585    #endif
586     SIZELIMIT     SIZELIMIT
587  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(3)Lazy created with " << m_samplesize << endl;)
588  }  }
# Line 624  DataLazy::DataLazy(DataAbstract_ptr left Line 650  DataLazy::DataLazy(DataAbstract_ptr left
650     m_buffsRequired=calcBuffs(m_left, m_right,m_op);     m_buffsRequired=calcBuffs(m_left, m_right,m_op);
651     m_children=m_left->m_children+m_right->m_children+2;     m_children=m_left->m_children+m_right->m_children+2;
652     m_height=max(m_left->m_height,m_right->m_height)+1;     m_height=max(m_left->m_height,m_right->m_height)+1;
653    #ifdef LAZY_NODE_STORAGE
654       LazyNodeSetup();
655    #endif
656     SIZELIMIT     SIZELIMIT
657  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(4)Lazy created with " << m_samplesize << endl;)
658  }  }
# Line 656  DataLazy::DataLazy(DataAbstract_ptr left Line 685  DataLazy::DataLazy(DataAbstract_ptr left
685     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
686     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
687     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
688    #ifdef LAZY_NODE_STORAGE
689       LazyNodeSetup();
690    #endif
691     SIZELIMIT     SIZELIMIT
692  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(5)Lazy created with " << m_samplesize << endl;)
693  }  }
# Line 687  DataLazy::DataLazy(DataAbstract_ptr left Line 719  DataLazy::DataLazy(DataAbstract_ptr left
719     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
720     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
721     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
722    #ifdef LAZY_NODE_STORAGE
723       LazyNodeSetup();
724    #endif
725     SIZELIMIT     SIZELIMIT
726  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(6)Lazy created with " << m_samplesize << endl;)
727  }  }
# Line 719  DataLazy::DataLazy(DataAbstract_ptr left Line 754  DataLazy::DataLazy(DataAbstract_ptr left
754     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());     m_maxsamplesize=max(m_samplesize,m_left->getMaxSampleSize());
755     m_children=m_left->m_children+1;     m_children=m_left->m_children+1;
756     m_height=m_left->m_height+1;     m_height=m_left->m_height+1;
757    #ifdef LAZY_NODE_STORAGE
758       LazyNodeSetup();
759    #endif
760     SIZELIMIT     SIZELIMIT
761  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)  LAZYDEBUG(cout << "(7)Lazy created with " << m_samplesize << endl;)
762  }  }
763    
764  DataLazy::~DataLazy()  DataLazy::~DataLazy()
765  {  {
766    #ifdef LAZY_NODE_SETUP
767       delete[] m_sampleids;
768       delete[] m_samples;
769    #endif
770  }  }
771    
772    
# Line 1543  cout << "\nWritten to: " << resultp << " Line 1585  cout << "\nWritten to: " << resultp << "
1585  }  }
1586    
1587    
1588    #ifdef LAZY_NODE_STORAGE
1589    
1590    // The result will be stored in m_samples
1591    // The return value is a pointer to the DataVector, offset is the offset within the return value
1592    const DataTypes::ValueType*
1593    DataLazy::resolveNodeSample(int tid, int sampleNo, size_t& roffset)
1594    {
1595    LAZYDEBUG(cout << "Resolve sample " << toString() << endl;)
1596        // collapse so we have a 'E' node or an IDENTITY for some other type
1597      if (m_readytype!='E' && m_op!=IDENTITY)
1598      {
1599        collapse();
1600      }
1601      if (m_op==IDENTITY)  
1602      {
1603        const ValueType& vec=m_id->getVectorRO();
1604    //     if (m_readytype=='C')
1605    //     {
1606    //  roffset=0;      // all samples read from the same position
1607    //  return &(m_samples);
1608    //     }
1609        roffset=m_id->getPointOffset(sampleNo, 0);
1610        return &(vec);
1611      }
1612      if (m_readytype!='E')
1613      {
1614        throw DataException("Programmer Error - Collapse did not produce an expanded node.");
1615      }
1616      if (m_sampleids[tid]==sampleNo)
1617      {
1618        roffset=tid*m_samplesize;
1619        return &(m_samples);        // sample is already resolved
1620      }
1621      m_sampleids[tid]=sampleNo;
1622      switch (getOpgroup(m_op))
1623      {
1624      case G_UNARY:
1625      case G_UNARY_P: return resolveNodeUnary(tid, sampleNo, roffset);
1626      case G_BINARY: return resolveNodeBinary(tid, sampleNo, roffset);
1627      case G_NP1OUT: return resolveNodeNP1OUT(tid, sampleNo, roffset);
1628      case G_NP1OUT_P: return resolveNodeNP1OUT_P(tid, sampleNo, roffset);
1629      case G_TENSORPROD: return resolveNodeTProd(tid, sampleNo, roffset);
1630      case G_NP1OUT_2P: return resolveNodeNP1OUT_2P(tid, sampleNo, roffset);
1631      default:
1632        throw DataException("Programmer Error - resolveSample does not know how to process "+opToString(m_op)+".");
1633      }
1634    }
1635    
1636    const DataTypes::ValueType*
1637    DataLazy::resolveNodeUnary(int tid, int sampleNo, size_t& roffset)
1638    {
1639        // we assume that any collapsing has been done before we get here
1640        // since we only have one argument we don't need to think about only
1641        // processing single points.
1642        // we will also know we won't get identity nodes
1643      if (m_readytype!='E')
1644      {
1645        throw DataException("Programmer error - resolveUnary should only be called on expanded Data.");
1646      }
1647      if (m_op==IDENTITY)
1648      {
1649        throw DataException("Programmer error - resolveNodeUnary should not be called on identity nodes.");
1650      }
1651      const DataTypes::ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, roffset);
1652      const double* left=&((*leftres)[roffset]);
1653      roffset=m_samplesize*tid;
1654      double* result=&(m_samples[roffset]);
1655      switch (m_op)
1656      {
1657        case SIN:  
1658        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sin);
1659        break;
1660        case COS:
1661        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cos);
1662        break;
1663        case TAN:
1664        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tan);
1665        break;
1666        case ASIN:
1667        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::asin);
1668        break;
1669        case ACOS:
1670        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::acos);
1671        break;
1672        case ATAN:
1673        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::atan);
1674        break;
1675        case SINH:
1676        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sinh);
1677        break;
1678        case COSH:
1679        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::cosh);
1680        break;
1681        case TANH:
1682        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::tanh);
1683        break;
1684        case ERF:
1685    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1686        throw DataException("Error - Data:: erf function is not supported on _WIN32 platforms.");
1687    #else
1688        tensor_unary_operation(m_samplesize, left, result, ::erf);
1689        break;
1690    #endif
1691       case ASINH:
1692    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1693        tensor_unary_operation(m_samplesize, left, result, escript::asinh_substitute);
1694    #else
1695        tensor_unary_operation(m_samplesize, left, result, ::asinh);
1696    #endif  
1697        break;
1698       case ACOSH:
1699    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1700        tensor_unary_operation(m_samplesize, left, result, escript::acosh_substitute);
1701    #else
1702        tensor_unary_operation(m_samplesize, left, result, ::acosh);
1703    #endif  
1704        break;
1705       case ATANH:
1706    #if defined (_WIN32) && !defined(__INTEL_COMPILER)
1707        tensor_unary_operation(m_samplesize, left, result, escript::atanh_substitute);
1708    #else
1709        tensor_unary_operation(m_samplesize, left, result, ::atanh);
1710    #endif  
1711        break;
1712        case LOG10:
1713        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log10);
1714        break;
1715        case LOG:
1716        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::log);
1717        break;
1718        case SIGN:
1719        tensor_unary_operation(m_samplesize, left, result, escript::fsign);
1720        break;
1721        case ABS:
1722        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::fabs);
1723        break;
1724        case NEG:
1725        tensor_unary_operation(m_samplesize, left, result, negate<double>());
1726        break;
1727        case POS:
1728        // it doesn't mean anything for delayed.
1729        // it will just trigger a deep copy of the lazy object
1730        throw DataException("Programmer error - POS not supported for lazy data.");
1731        break;
1732        case EXP:
1733        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::exp);
1734        break;
1735        case SQRT:
1736        tensor_unary_operation<double (*)(double)>(m_samplesize, left, result, ::sqrt);
1737        break;
1738        case RECIP:
1739        tensor_unary_operation(m_samplesize, left, result, bind1st(divides<double>(),1.));
1740        break;
1741        case GZ:
1742        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater<double>(),0.0));
1743        break;
1744        case LZ:
1745        tensor_unary_operation(m_samplesize, left, result, bind2nd(less<double>(),0.0));
1746        break;
1747        case GEZ:
1748        tensor_unary_operation(m_samplesize, left, result, bind2nd(greater_equal<double>(),0.0));
1749        break;
1750        case LEZ:
1751        tensor_unary_operation(m_samplesize, left, result, bind2nd(less_equal<double>(),0.0));
1752        break;
1753    // There are actually G_UNARY_P but I don't see a compelling reason to treat them differently
1754        case NEZ:
1755        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsGT(),m_tol));
1756        break;
1757        case EZ:
1758        tensor_unary_operation(m_samplesize, left, result, bind2nd(AbsLTE(),m_tol));
1759        break;
1760    
1761        default:
1762        throw DataException("Programmer error - resolveUnary can not resolve operator "+opToString(m_op)+".");
1763      }
1764      return &(m_samples);
1765    }
1766    
1767    
1768    const DataTypes::ValueType*
1769    DataLazy::resolveNodeNP1OUT(int tid, int sampleNo, size_t& roffset)
1770    {
1771        // we assume that any collapsing has been done before we get here
1772        // since we only have one argument we don't need to think about only
1773        // processing single points.
1774      if (m_readytype!='E')
1775      {
1776        throw DataException("Programmer error - resolveNodeNP1OUT should only be called on expanded Data.");
1777      }
1778      if (m_op==IDENTITY)
1779      {
1780        throw DataException("Programmer error - resolveNodeNP1OUT should not be called on identity nodes.");
1781      }
1782      size_t subroffset;
1783      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1784      roffset=m_samplesize*tid;
1785      size_t loop=0;
1786      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1787      size_t step=getNoValues();
1788      size_t offset=roffset;
1789      switch (m_op)
1790      {
1791        case SYM:
1792        for (loop=0;loop<numsteps;++loop)
1793        {
1794            DataMaths::symmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1795            subroffset+=step;
1796            offset+=step;
1797        }
1798        break;
1799        case NSYM:
1800        for (loop=0;loop<numsteps;++loop)
1801        {
1802            DataMaths::nonsymmetric(*leftres,m_left->getShape(),subroffset, m_samples, getShape(), offset);
1803            subroffset+=step;
1804            offset+=step;
1805        }
1806        break;
1807        default:
1808        throw DataException("Programmer error - resolveNP1OUT can not resolve operator "+opToString(m_op)+".");
1809      }
1810      return &m_samples;
1811    }
1812    
1813    const DataTypes::ValueType*
1814    DataLazy::resolveNodeNP1OUT_P(int tid, int sampleNo, size_t& roffset)
1815    {
1816        // we assume that any collapsing has been done before we get here
1817        // since we only have one argument we don't need to think about only
1818        // processing single points.
1819      if (m_readytype!='E')
1820      {
1821        throw DataException("Programmer error - resolveNodeNP1OUT_P should only be called on expanded Data.");
1822      }
1823      if (m_op==IDENTITY)
1824      {
1825        throw DataException("Programmer error - resolveNodeNP1OUT_P should not be called on identity nodes.");
1826      }
1827      size_t subroffset;
1828      size_t offset;
1829      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1830      roffset=m_samplesize*tid;
1831      offset=roffset;
1832      size_t loop=0;
1833      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1834      size_t outstep=getNoValues();
1835      size_t instep=m_left->getNoValues();
1836      switch (m_op)
1837      {
1838        case TRACE:
1839        for (loop=0;loop<numsteps;++loop)
1840        {
1841                DataMaths::trace(*leftres,m_left->getShape(),subroffset, m_samples ,getShape(),offset,m_axis_offset);
1842            subroffset+=instep;
1843            offset+=outstep;
1844        }
1845        break;
1846        case TRANS:
1847        for (loop=0;loop<numsteps;++loop)
1848        {
1849                DataMaths::transpose(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset,m_axis_offset);
1850            subroffset+=instep;
1851            offset+=outstep;
1852        }
1853        break;
1854        default:
1855        throw DataException("Programmer error - resolveNP1OUTP can not resolve operator "+opToString(m_op)+".");
1856      }
1857      return &m_samples;
1858    }
1859    
1860    
1861    const DataTypes::ValueType*
1862    DataLazy::resolveNodeNP1OUT_2P(int tid, int sampleNo, size_t& roffset)
1863    {
1864      if (m_readytype!='E')
1865      {
1866        throw DataException("Programmer error - resolveNodeNP1OUT_2P should only be called on expanded Data.");
1867      }
1868      if (m_op==IDENTITY)
1869      {
1870        throw DataException("Programmer error - resolveNodeNP1OUT_2P should not be called on identity nodes.");
1871      }
1872      size_t subroffset;
1873      size_t offset;
1874      const ValueType* leftres=m_left->resolveNodeSample(tid, sampleNo, subroffset);
1875      roffset=m_samplesize*tid;
1876      offset=roffset;
1877      size_t loop=0;
1878      size_t numsteps=(m_readytype=='E')?getNumDPPSample():1;
1879      size_t outstep=getNoValues();
1880      size_t instep=m_left->getNoValues();
1881      switch (m_op)
1882      {
1883        case SWAP:
1884        for (loop=0;loop<numsteps;++loop)
1885        {
1886                DataMaths::swapaxes(*leftres,m_left->getShape(),subroffset, m_samples, getShape(),offset, m_axis_offset, m_transpose);
1887            subroffset+=instep;
1888            offset+=outstep;
1889        }
1890        break;
1891        default:
1892        throw DataException("Programmer error - resolveNodeNP1OUT2P can not resolve operator "+opToString(m_op)+".");
1893      }
1894      return &m_samples;
1895    }
1896    
1897    
1898    
1899    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
1900    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
1901    // If both children are expanded, then we can process them in a single operation (we treat
1902    // the whole sample as one big datapoint.
1903    // If one of the children is not expanded, then we need to treat each point in the sample
1904    // individually.
1905    // There is an additional complication when scalar operations are considered.
1906    // For example, 2+Vector.
1907    // In this case each double within the point is treated individually
1908    const DataTypes::ValueType*
1909    DataLazy::resolveNodeBinary(int tid, int sampleNo, size_t& roffset)
1910    {
1911    LAZYDEBUG(cout << "Resolve binary: " << toString() << endl;)
1912    
1913      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
1914        // first work out which of the children are expanded
1915      bool leftExp=(m_left->m_readytype=='E');
1916      bool rightExp=(m_right->m_readytype=='E');
1917      if (!leftExp && !rightExp)
1918      {
1919        throw DataException("Programmer Error - please use collapse if neither argument has type 'E'.");
1920      }
1921      bool leftScalar=(m_left->getRank()==0);
1922      bool rightScalar=(m_right->getRank()==0);
1923      if ((m_left->getRank()!=m_right->getRank()) && (!leftScalar && !rightScalar))
1924      {
1925        throw DataException("resolveBinary - ranks of arguments must match unless one of them is scalar.");
1926      }
1927      size_t leftsize=m_left->getNoValues();
1928      size_t rightsize=m_right->getNoValues();
1929      size_t chunksize=1;           // how many doubles will be processed in one go
1930      int leftstep=0;       // how far should the left offset advance after each step
1931      int rightstep=0;
1932      int numsteps=0;       // total number of steps for the inner loop
1933      int oleftstep=0;  // the o variables refer to the outer loop
1934      int orightstep=0; // The outer loop is only required in cases where there is an extended scalar
1935      int onumsteps=1;
1936      
1937      bool LES=(leftExp && leftScalar); // Left is an expanded scalar
1938      bool RES=(rightExp && rightScalar);
1939      bool LS=(!leftExp && leftScalar); // left is a single scalar
1940      bool RS=(!rightExp && rightScalar);
1941      bool LN=(!leftExp && !leftScalar);    // left is a single non-scalar
1942      bool RN=(!rightExp && !rightScalar);
1943      bool LEN=(leftExp && !leftScalar);    // left is an expanded non-scalar
1944      bool REN=(rightExp && !rightScalar);
1945    
1946      if ((LES && RES) || (LEN && REN)) // both are Expanded scalars or both are expanded non-scalars
1947      {
1948        chunksize=m_left->getNumDPPSample()*leftsize;
1949        leftstep=0;
1950        rightstep=0;
1951        numsteps=1;
1952      }
1953      else if (LES || RES)
1954      {
1955        chunksize=1;
1956        if (LES)        // left is an expanded scalar
1957        {
1958            if (RS)
1959            {
1960               leftstep=1;
1961               rightstep=0;
1962               numsteps=m_left->getNumDPPSample();
1963            }
1964            else        // RN or REN
1965            {
1966               leftstep=0;
1967               oleftstep=1;
1968               rightstep=1;
1969               orightstep=(RN ? -(int)rightsize : 0);
1970               numsteps=rightsize;
1971               onumsteps=m_left->getNumDPPSample();
1972            }
1973        }
1974        else        // right is an expanded scalar
1975        {
1976            if (LS)
1977            {
1978               rightstep=1;
1979               leftstep=0;
1980               numsteps=m_right->getNumDPPSample();
1981            }
1982            else
1983            {
1984               rightstep=0;
1985               orightstep=1;
1986               leftstep=1;
1987               oleftstep=(LN ? -(int)leftsize : 0);
1988               numsteps=leftsize;
1989               onumsteps=m_right->getNumDPPSample();
1990            }
1991        }
1992      }
1993      else  // this leaves (LEN, RS), (LEN, RN) and their transposes
1994      {
1995        if (LEN)    // and Right will be a single value
1996        {
1997            chunksize=rightsize;
1998            leftstep=rightsize;
1999            rightstep=0;
2000            numsteps=m_left->getNumDPPSample();
2001            if (RS)
2002            {
2003               numsteps*=leftsize;
2004            }
2005        }
2006        else    // REN
2007        {
2008            chunksize=leftsize;
2009            rightstep=leftsize;
2010            leftstep=0;
2011            numsteps=m_right->getNumDPPSample();
2012            if (LS)
2013            {
2014               numsteps*=rightsize;
2015            }
2016        }
2017      }
2018    
2019      int resultStep=max(leftstep,rightstep);   // only one (at most) should be !=0
2020        // Get the values of sub-expressions
2021      const ValueType* left=m_left->resolveNodeSample(tid,sampleNo,lroffset);  
2022      const ValueType* right=m_right->resolveNodeSample(tid,sampleNo,rroffset);
2023    LAZYDEBUG(cout << "Post sub calls in " << toString() << endl;)
2024    LAZYDEBUG(cout << "shapes=" << DataTypes::shapeToString(m_left->getShape()) << "," << DataTypes::shapeToString(m_right->getShape()) << endl;)
2025    LAZYDEBUG(cout << "chunksize=" << chunksize << endl << "leftstep=" << leftstep << " rightstep=" << rightstep;)
2026    LAZYDEBUG(cout << " numsteps=" << numsteps << endl << "oleftstep=" << oleftstep << " orightstep=" << orightstep;)
2027    LAZYDEBUG(cout << "onumsteps=" << onumsteps << endl;)
2028    LAZYDEBUG(cout << " DPPS=" << m_left->getNumDPPSample() << "," <<m_right->getNumDPPSample() << endl;)
2029    LAZYDEBUG(cout << "" << LS << RS << LN << RN << LES << RES <<LEN << REN <<   endl;)
2030    
2031    LAZYDEBUG(cout << "Left res["<< lroffset<< "]=" << (*left)[lroffset] << endl;)
2032    LAZYDEBUG(cout << "Right res["<< rroffset<< "]=" << (*right)[rroffset] << endl;)
2033    
2034    
2035      roffset=m_samplesize*tid;
2036      double* resultp=&(m_samples[roffset]);        // results are stored at the vector offset we recieved
2037      switch(m_op)
2038      {
2039        case ADD:
2040            PROC_OP(NO_ARG,plus<double>());
2041        break;
2042        case SUB:
2043        PROC_OP(NO_ARG,minus<double>());
2044        break;
2045        case MUL:
2046        PROC_OP(NO_ARG,multiplies<double>());
2047        break;
2048        case DIV:
2049        PROC_OP(NO_ARG,divides<double>());
2050        break;
2051        case POW:
2052           PROC_OP(double (double,double),::pow);
2053        break;
2054        default:
2055        throw DataException("Programmer error - resolveBinary can not resolve operator "+opToString(m_op)+".");
2056      }
2057    LAZYDEBUG(cout << "Result res[" << roffset<< "]" << m_samples[roffset] << endl;)
2058      return &m_samples;
2059    }
2060    
2061    
2062    // This method assumes that any subexpressions which evaluate to Constant or Tagged Data
2063    // have already been collapsed to IDENTITY. So we must have at least one expanded child.
2064    // unlike the other resolve helpers, we must treat these datapoints separately.
2065    const DataTypes::ValueType*
2066    DataLazy::resolveNodeTProd(int tid, int sampleNo, size_t& roffset)
2067    {
2068    LAZYDEBUG(cout << "Resolve TensorProduct: " << toString() << endl;)
2069    
2070      size_t lroffset=0, rroffset=0;    // offsets in the left and right result vectors
2071        // first work out which of the children are expanded
2072      bool leftExp=(m_left->m_readytype=='E');
2073      bool rightExp=(m_right->m_readytype=='E');
2074      int steps=getNumDPPSample();
2075      int leftStep=(leftExp? m_left->getNoValues() : 0);        // do not have scalars as input to this method
2076      int rightStep=(rightExp?m_right->getNoValues() : 0);
2077    
2078      int resultStep=getNoValues();
2079      roffset=m_samplesize*tid;
2080      size_t offset=roffset;
2081    
2082      const ValueType* left=m_left->resolveNodeSample(tid, sampleNo, lroffset);
2083    
2084      const ValueType* right=m_right->resolveNodeSample(tid, sampleNo, rroffset);
2085    
2086    LAZYDEBUG(cerr << "[Left shape]=" << DataTypes::shapeToString(m_left->getShape()) << "\n[Right shape]=" << DataTypes::shapeToString(m_right->getShape()) << " result=" <<DataTypes::shapeToString(getShape()) <<  endl;
2087    cout << getNoValues() << endl;)
2088    
2089    
2090    LAZYDEBUG(cerr << "Post sub calls: " << toString() << endl;)
2091    LAZYDEBUG(cout << "LeftExp=" << leftExp << " rightExp=" << rightExp << endl;)
2092    LAZYDEBUG(cout << "LeftR=" << m_left->getRank() << " rightExp=" << m_right->getRank() << endl;)
2093    LAZYDEBUG(cout << "LeftSize=" << m_left->getNoValues() << " RightSize=" << m_right->getNoValues() << endl;)
2094    LAZYDEBUG(cout << "m_samplesize=" << m_samplesize << endl;)
2095    LAZYDEBUG(cout << "outputshape=" << DataTypes::shapeToString(getShape()) << endl;)
2096    LAZYDEBUG(cout << "DPPS=" << m_right->getNumDPPSample() <<"."<<endl;)
2097    
2098      double* resultp=&(m_samples[offset]);     // results are stored at the vector offset we recieved
2099      switch(m_op)
2100      {
2101        case PROD:
2102        for (int i=0;i<steps;++i,resultp+=resultStep)
2103        {
2104              const double *ptr_0 = &((*left)[lroffset]);
2105              const double *ptr_1 = &((*right)[rroffset]);
2106    
2107    LAZYDEBUG(cout << DataTypes::pointToString(*left, m_left->getShape(),lroffset,"LEFT") << endl;)
2108    LAZYDEBUG(cout << DataTypes::pointToString(*right,m_right->getShape(),rroffset, "RIGHT") << endl;)
2109    
2110              matrix_matrix_product(m_SL, m_SM, m_SR, ptr_0, ptr_1, resultp, m_transpose);
2111    
2112          lroffset+=leftStep;
2113          rroffset+=rightStep;
2114        }
2115        break;
2116        default:
2117        throw DataException("Programmer error - resolveTProduct can not resolve operator "+opToString(m_op)+".");
2118      }
2119      roffset=offset;
2120      return &m_samples;
2121    }
2122    #endif //LAZY_NODE_STORAGE
2123    
2124  /*  /*
2125    \brief Compute the value of the expression for the given sample.    \brief Compute the value of the expression for the given sample.
# Line 1613  DataLazy::resolveSample(BufferGroup& bg, Line 2190  DataLazy::resolveSample(BufferGroup& bg,
2190  }  }
2191    
2192    
2193  // This needs to do the work of the idenity constructor  // This needs to do the work of the identity constructor
2194  void  void
2195  DataLazy::resolveToIdentity()  DataLazy::resolveToIdentity()
2196  {  {
2197     if (m_op==IDENTITY)     if (m_op==IDENTITY)
2198      return;      return;
2199     DataReady_ptr p=resolve();  #ifndef LAZY_NODE_STORAGE
2200       DataReady_ptr p=resolveVectorWorker();
2201    #else
2202       DataReady_ptr p=resolveNodeWorker();
2203    #endif
2204     makeIdentity(p);     makeIdentity(p);
2205  }  }
2206    
# Line 1642  void DataLazy::makeIdentity(const DataRe Line 2223  void DataLazy::makeIdentity(const DataRe
2223     m_right.reset();     m_right.reset();
2224  }  }
2225    
2226    
2227    DataReady_ptr
2228    DataLazy::resolve()
2229    {
2230        resolveToIdentity();
2231        return m_id;
2232    }
2233    
2234    #ifdef LAZY_NODE_STORAGE
2235    
2236    // This version of resolve uses storage in each node to hold results
2237    DataReady_ptr
2238    DataLazy::resolveNodeWorker()
2239    {
2240      if (m_readytype!='E')     // if the whole sub-expression is Constant or Tagged, then evaluate it normally
2241      {
2242        collapse();
2243      }
2244      if (m_op==IDENTITY)       // So a lazy expression of Constant or Tagged data will be returned here.
2245      {
2246        return m_id;
2247      }
2248        // from this point on we must have m_op!=IDENTITY and m_readytype=='E'
2249      DataExpanded* result=new DataExpanded(getFunctionSpace(),getShape(),  ValueType(getNoValues()));
2250      ValueType& resvec=result->getVectorRW();
2251      DataReady_ptr resptr=DataReady_ptr(result);
2252    
2253      int sample;
2254      int totalsamples=getNumSamples();
2255      const ValueType* res=0;   // Storage for answer
2256    LAZYDEBUG(cout << "Total number of samples=" <<totalsamples << endl;)
2257      #pragma omp parallel for private(sample,res) schedule(static)
2258      for (sample=0;sample<totalsamples;++sample)
2259      {
2260        size_t roffset=0;
2261    #ifdef _OPENMP
2262        res=resolveNodeSample(omp_get_thread_num(),sample,roffset);
2263    #else
2264        res=resolveNodeSample(0,sample,roffset);
2265    #endif
2266    LAZYDEBUG(cout << "Sample #" << sample << endl;)
2267    LAZYDEBUG(cout << "Final res[" << roffset<< "]=" << (*res)[roffset] << (*res)[roffset]<< endl; )
2268        DataVector::size_type outoffset=result->getPointOffset(sample,0);
2269        memcpy(&(resvec[outoffset]),&((*res)[roffset]),m_samplesize*sizeof(DataVector::ElementType));
2270      }
2271      return resptr;
2272    }
2273    
2274    #endif // LAZY_NODE_STORAGE
2275    
2276  // To simplify the memory management, all threads operate on one large vector, rather than one each.  // To simplify the memory management, all threads operate on one large vector, rather than one each.
2277  // Each sample is evaluated independently and copied into the result DataExpanded.  // Each sample is evaluated independently and copied into the result DataExpanded.
2278  DataReady_ptr  DataReady_ptr
2279  DataLazy::resolve()  DataLazy::resolveVectorWorker()
2280  {  {
2281    
2282  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)  LAZYDEBUG(cout << "Sample size=" << m_samplesize << endl;)
# Line 1679  LAZYDEBUG(cout << "Total number of sampl Line 2310  LAZYDEBUG(cout << "Total number of sampl
2310    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)    #pragma omp parallel for private(sample,resoffset,outoffset,res) schedule(static)
2311    for (sample=0;sample<totalsamples;++sample)    for (sample=0;sample<totalsamples;++sample)
2312    {    {
       if (sample==0)  {ENABLEDEBUG}  
   
 //       if (sample==5800/getNumDPPSample())  {ENABLEDEBUG}  
2313  LAZYDEBUG(cout << "################################# " << sample << endl;)  LAZYDEBUG(cout << "################################# " << sample << endl;)
2314  #ifdef _OPENMP  #ifdef _OPENMP
2315      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);      res=resolveSample(v,threadbuffersize*omp_get_thread_num(),sample,resoffset);
# Line 1699  LAZYDEBUG(cerr << "outoffset=" << outoff Line 2327  LAZYDEBUG(cerr << "outoffset=" << outoff
2327      }      }
2328  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)  LAZYDEBUG(cerr << DataTypes::pointToString(resvec,getShape(),outoffset-m_samplesize+DataTypes::noValues(getShape()),"Final result:") << endl;)
2329  LAZYDEBUG(cerr << "*********************************" << endl;)  LAZYDEBUG(cerr << "*********************************" << endl;)
     DISABLEDEBUG  
2330    }    }
2331    return resptr;    return resptr;
2332  }  }
# Line 1876  DataLazy::setToZero() Line 2503  DataLazy::setToZero()
2503  //   m_readytype='C';  //   m_readytype='C';
2504  //   m_buffsRequired=1;  //   m_buffsRequired=1;
2505    
2506      privdebug=privdebug;  // to stop the compiler complaining about unused privdebug
2507    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");    throw DataException("Programmer error - setToZero not supported for DataLazy (DataLazy objects should be read only).");
   
2508  }  }
2509    
2510  bool  bool

Legend:
Removed from v.2496  
changed lines
  Added in v.2514

  ViewVC Help
Powered by ViewVC 1.1.26