# Diff of /trunk/ripley/src/Rectangle.cpp

revision 4675 by sshaw, Mon Feb 17 01:07:12 2014 UTC revision 4765 by sshaw, Wed Mar 19 00:17:16 2014 UTC
# Line 14  Line 14
14  *  *
15  *****************************************************************************/  *****************************************************************************/
16
17    #include <algorithm>
18
19  #include <ripley/Rectangle.h>  #include <ripley/Rectangle.h>
20  #include <paso/SystemMatrix.h>  #include <paso/SystemMatrix.h>
21  #include <esysUtils/esysFileWriter.h>  #include <esysUtils/esysFileWriter.h>
22  #include <ripley/DefaultAssembler2D.h>  #include <ripley/DefaultAssembler2D.h>
23  #include <ripley/WaveAssembler2D.h>  #include <ripley/WaveAssembler2D.h>
24    #include <ripley/LameAssembler2D.h>
25    #include <ripley/domainhelpers.h>
26  #include <boost/scoped_array.hpp>  #include <boost/scoped_array.hpp>
27  #include "esysUtils/EsysRandom.h"  #include "esysUtils/EsysRandom.h"
28    #include "blocktools.h"
29
30  #ifdef USE_NETCDF  #ifdef USE_NETCDF
31  #include <netcdfcpp.h>  #include <netcdfcpp.h>
# Line 54  Rectangle::Rectangle(int n0, int n1, dou Line 59  Rectangle::Rectangle(int n0, int n1, dou
59      }      }
60
61      bool warn=false;      bool warn=false;
62      // if number of subdivisions is non-positive, try to subdivide by the same      std::vector<int> factors;
63      // ratio as the number of elements      int ranks = m_mpiInfo->size;
64      if (d0<=0 && d1<=0) {      int epr[2] = {n0,n1};
65          warn=true;      int d[2] = {d0,d1};
66          d0=max(1, (int)(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));      if (d0<=0 || d1<=0) {
67          d1=m_mpiInfo->size/d0;          for (int i = 0; i < 2; i++) {
68          if (d0*d1 != m_mpiInfo->size) {              if (d[i] < 1) {
69              // ratios not the same so subdivide side with more elements only                  d[i] = 1;
70              if (n0>n1) {                  continue;
d0=0;
d1=1;
} else {
d0=1;
d1=0;
71              }              }
72          }              epr[i] = -1; // can no longer be max
73                //remove
74                if (ranks % d[i] != 0) {
75                    throw RipleyException("Invalid number of spatial subdivisions");
76                }
77                ranks /= d[i];
78            }
79            factorise(factors, ranks);
80            if (factors.size() != 0) {
81                warn = true;
82            }
83        }
84
85        while (factors.size() > 0) {
86            int i = epr[0] > epr[1] ? 0 : 1;
87            int f = factors.back();
88            factors.pop_back();
89            d[i] *= f;
90            epr[i] /= f;
91      }      }
92      if (d0<=0) {      d0 = d[0]; d1 = d[1];
93          // d1 is preset, determine d0 - throw further down if result is no good
d0=m_mpiInfo->size/d1;
} else if (d1<=0) {
// d0 is preset, determine d1 - throw further down if result is no good
d1=m_mpiInfo->size/d0;
}

94      // ensure number of subdivisions is valid and nodes can be distributed      // ensure number of subdivisions is valid and nodes can be distributed
95      // among number of ranks      // among number of ranks
96      if (d0*d1 != m_mpiInfo->size)      if (d0*d1 != m_mpiInfo->size)
333      }      }
334  }  }
335
336    void Rectangle::readBinaryGridFromZipped(escript::Data& out, string filename,
338    {
339        // the mapping is not universally correct but should work on our
340        // supported platforms
341        switch (params.dataType) {
342            case DATATYPE_INT32:
344                break;
345            case DATATYPE_FLOAT32:
347                break;
348            case DATATYPE_FLOAT64:
350                break;
351            default:
352                throw RipleyException("readBinaryGridFromZipped(): invalid or unsupported datatype");
353        }
354    }
355
356  template<typename ValueType>  template<typename ValueType>
357  void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,  void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
441      f.close();      f.close();
442  }  }
443
444    template<typename ValueType>
445    void Rectangle::readBinaryGridZippedImpl(escript::Data& out, const string& filename,
447    {
448        // check destination function space
449        int myN0, myN1;
450        if (out.getFunctionSpace().getTypeCode() == Nodes) {
451            myN0 = m_NN[0];
452            myN1 = m_NN[1];
453        } else if (out.getFunctionSpace().getTypeCode() == Elements ||
454                    out.getFunctionSpace().getTypeCode() == ReducedElements) {
455            myN0 = m_NE[0];
456            myN1 = m_NE[1];
457        } else
458            throw RipleyException("readBinaryGrid(): invalid function space for output data object");
459
460        // check file existence and size
461        ifstream f(filename.c_str(), ifstream::binary);
462        if (f.fail()) {
463            throw RipleyException("readBinaryGridFromZipped(): cannot open file");
464        }
465        f.seekg(0, ios::end);
466        const int numComp = out.getDataPointSize();
467        int filesize = f.tellg();
468        f.seekg(0, ios::beg);
469        std::vector<char> compressed(filesize);
471        f.close();
472        std::vector<char> decompressed = unzip(compressed);
473        filesize = decompressed.size();
474        const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
475        if (filesize < reqsize) {
476            throw RipleyException("readBinaryGridFromZipped(): not enough data in file");
477        }
478
479        // check if this rank contributes anything
480        if (params.first[0] >= m_offset[0]+myN0 ||
481                params.first[0]+params.numValues[0] <= m_offset[0] ||
482                params.first[1] >= m_offset[1]+myN1 ||
483                params.first[1]+params.numValues[1] <= m_offset[1]) {
484            f.close();
485            return;
486        }
487
488        // now determine how much this rank has to write
489
490        // first coordinates in data object to write to
491        const int first0 = max(0, params.first[0]-m_offset[0]);
492        const int first1 = max(0, params.first[1]-m_offset[1]);
493        // indices to first value in file
494        const int idx0 = max(0, m_offset[0]-params.first[0]);
495        const int idx1 = max(0, m_offset[1]-params.first[1]);
496        // number of values to read
497        const int num0 = min(params.numValues[0]-idx0, myN0-first0);
498        const int num1 = min(params.numValues[1]-idx1, myN1-first1);
499
500        out.requireWrite();
501        vector<ValueType> values(num0*numComp);
502        const int dpp = out.getNumDataPointsPerSample();
503
504        for (int y=0; y<num1; y++) {
505            const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
506                memcpy((char*)&values[0], (char*)&decompressed[fileofs*sizeof(ValueType)], num0*numComp*sizeof(ValueType));
507            for (int x=0; x<num0; x++) {
508                const int baseIndex = first0+x*params.multiplier[0]
509                                        +(first1+y*params.multiplier[1])*myN0;
510                for (int m1=0; m1<params.multiplier[1]; m1++) {
511                    for (int m0=0; m0<params.multiplier[0]; m0++) {
512                        const int dataIndex = baseIndex+m0+m1*myN0;
513                        double* dest = out.getSampleDataRW(dataIndex);
514                        for (int c=0; c<numComp; c++) {
515                            ValueType val = values[x*numComp+c];
516
517                            if (params.byteOrder != BYTEORDER_NATIVE) {
518                                char* cval = reinterpret_cast<char*>(&val);
519                                // this will alter val!!
520                                byte_swap32(cval);
521                            }
522                            if (!std::isnan(val)) {
523                                for (int q=0; q<dpp; q++) {
524                                    *dest++ = static_cast<double>(val);
525                                }
526                            }
527                        }
528                    }
529                }
530            }
531        }
532
533        f.close();
534    }
535
536  void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,  void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
537                                  int byteOrder, int dataType) const                                  int byteOrder, int dataType) const
538  {  {
# Line 490  void Rectangle::dump(const string& fileN Line 614  void Rectangle::dump(const string& fileN
614          fn+=".silo";          fn+=".silo";
615      }      }
616
617      int driver=DB_HDF5;      int driver=DB_HDF5;
618      DBfile* dbfile = NULL;      DBfile* dbfile = NULL;
619      const char* blockDirFmt = "/block%04d";      const char* blockDirFmt = "/block%04d";
620
# Line 1511  void Rectangle::createPattern() Line 1635  void Rectangle::createPattern()
1635      RankVector neighbour;      RankVector neighbour;
1636      IndexVector offsetInShared(1,0);      IndexVector offsetInShared(1,0);
1637      IndexVector sendShared, recvShared;      IndexVector sendShared, recvShared;
1638      int numShared=0;      int numShared=0, expectedShared=0;
1639      const int x=m_mpiInfo->rank%m_NX[0];      const int x=m_mpiInfo->rank%m_NX[0];
1640      const int y=m_mpiInfo->rank/m_NX[0];      const int y=m_mpiInfo->rank/m_NX[0];
1641        if (x > 0)
1642            expectedShared += nDOF1;
1643        if (x < m_NX[0] - 1)
1644            expectedShared += nDOF1;
1645        if (y > 0)
1646            expectedShared += nDOF0;
1647        if (y < m_NX[1] - 1)
1648            expectedShared += nDOF0;
1649        if (x > 0 && y > 0) expectedShared++;
1650        if (x > 0 && y < m_NX[1] - 1) expectedShared++;
1651        if (x < m_NX[0] - 1 && y > 0) expectedShared++;
1652        if (x < m_NX[0] - 1 && y < m_NX[1] - 1) expectedShared++;
1653
1654        vector<IndexVector> rowIndices(expectedShared);
1655
1656      for (int i1=-1; i1<2; i1++) {      for (int i1=-1; i1<2; i1++) {
1657          for (int i0=-1; i0<2; i0++) {          for (int i0=-1; i0<2; i0++) {
1658              // skip this rank              // skip this rank
# Line 1533  void Rectangle::createPattern() Line 1672  void Rectangle::createPattern()
1672                          sendShared.push_back(firstDOF+i);                          sendShared.push_back(firstDOF+i);
1673                          recvShared.push_back(numDOF+numShared);                          recvShared.push_back(numDOF+numShared);
1674                          if (i>0)                          if (i>0)
1675                              colIndices[firstDOF+i-1].push_back(numShared);                              doublyLink(colIndices, rowIndices, firstDOF+i-1, numShared);
1676                          colIndices[firstDOF+i].push_back(numShared);                          doublyLink(colIndices, rowIndices, firstDOF+i, numShared);
1677                          if (i<nDOF0-1)                          if (i<nDOF0-1)
1678                              colIndices[firstDOF+i+1].push_back(numShared);                              doublyLink(colIndices, rowIndices, firstDOF+i+1, numShared);
1679                          m_dofMap[firstNode+i]=numDOF+numShared;                          m_dofMap[firstNode+i]=numDOF+numShared;
1680                      }                      }
1681                  } else if (i1==0) {                  } else if (i1==0) {
# Line 1548  void Rectangle::createPattern() Line 1687  void Rectangle::createPattern()
1687                          sendShared.push_back(firstDOF+i*nDOF0);                          sendShared.push_back(firstDOF+i*nDOF0);
1688                          recvShared.push_back(numDOF+numShared);                          recvShared.push_back(numDOF+numShared);
1689                          if (i>0)                          if (i>0)
1690                              colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);                              doublyLink(colIndices, rowIndices, firstDOF+(i-1)*nDOF0, numShared);
1691                          colIndices[firstDOF+i*nDOF0].push_back(numShared);                          doublyLink(colIndices, rowIndices, firstDOF+i*nDOF0, numShared);
1692                          if (i<nDOF1-1)                          if (i<nDOF1-1)
1693                              colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);                              doublyLink(colIndices, rowIndices, firstDOF+(i+1)*nDOF0, numShared);
1694                          m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;                          m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
1695                      }                      }
1696                  } else {                  } else {
# Line 1561  void Rectangle::createPattern() Line 1700  void Rectangle::createPattern()
1700                      offsetInShared.push_back(offsetInShared.back()+1);                      offsetInShared.push_back(offsetInShared.back()+1);
1701                      sendShared.push_back(dof);                      sendShared.push_back(dof);
1702                      recvShared.push_back(numDOF+numShared);                      recvShared.push_back(numDOF+numShared);
1703                      colIndices[dof].push_back(numShared);                      doublyLink(colIndices, rowIndices, dof, numShared);
1704                      m_dofMap[node]=numDOF+numShared;                      m_dofMap[node]=numDOF+numShared;
1705                      ++numShared;                      ++numShared;
1706                  }                  }
1707              }              }
1708          }          }
1709      }      }
1710
1711    #pragma omp parallel for
1712        for (int i = 0; i < numShared; i++) {
1713            std::sort(rowIndices[i].begin(), rowIndices[i].end());
1714        }
1715
1716      // create connector      // create connector
1717      Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(      Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1718              numDOF, neighbour.size(), &neighbour[0], &sendShared[0],              numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
# Line 1583  void Rectangle::createPattern() Line 1727  void Rectangle::createPattern()
1727      // create main and couple blocks      // create main and couple blocks
1728      Paso_Pattern *mainPattern = createMainPattern();      Paso_Pattern *mainPattern = createMainPattern();
1729      Paso_Pattern *colPattern, *rowPattern;      Paso_Pattern *colPattern, *rowPattern;
1730      createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);      createCouplePatterns(colIndices, rowIndices, numShared, &colPattern, &rowPattern);
1731
1732      // allocate paso distribution      // allocate paso distribution
1733      Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,      Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
# Line 1870  void Rectangle::interpolateNodesOnFaces( Line 2014  void Rectangle::interpolateNodesOnFaces(
2014  namespace  namespace
2015  {  {
2016      // Calculates a guassian blur colvolution matrix for 2D      // Calculates a guassian blur colvolution matrix for 2D
2017      // See wiki article on the subject      // See wiki article on the subject
2019      {      {
# Line 1880  namespace Line 2024  namespace
2024          for (int y=-r;y<=r;++y)          for (int y=-r;y<=r;++y)
2025          {          {
2026              for (int x=-r;x<=r;++x)              for (int x=-r;x<=r;++x)
2027              {              {
2028                  arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));                  arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));
// cout << (x+y*(r*2+1)) << " " << arr[(x+r)+(y+r)*(r*2+1)] << endl;
2029                  total+=arr[(x+r)+(y+r)*(r*2+1)];                  total+=arr[(x+r)+(y+r)*(r*2+1)];
2030              }              }
2031          }          }
2032          double invtotal=1/total;          double invtotal=1/total;
//cout << "Inv total is "         << invtotal << endl;
2034          {          {
2035              arr[p]*=invtotal;              arr[p]*=invtotal;
//cout << p << "->" << arr[p] << endl;
2036          }          }
2037          return arr;          return arr;
2038      }      }
2039
2040      // applies conv to source to get a point.      // applies conv to source to get a point.
2041      // (xp, yp) are the coords in the source matrix not the destination matrix      // (xp, yp) are the coords in the source matrix not the destination matrix
2042      double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width)      double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width)
# Line 1904  namespace Line 2045  namespace
2045          size_t sbase=bx+by*width;          size_t sbase=bx+by*width;
2046          double result=0;          double result=0;
2048          {          {
2050              {              {
2052              }              }
2053          }          }
2054          return result;          return result;
2055      }      }
2056  }  }
2057
2058
2059    /* This is a wrapper for filtered (and non-filtered) randoms
2060     * For detailed doco see randomFillWorker
2061    */
2062    escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
2063           const escript::FunctionSpace& what,
2064           long seed, const boost::python::tuple& filter) const
2065    {
2066        int numvals=escript::DataTypes::noValues(shape);
2067        if (len(filter)>0 && (numvals!=1))
2068        {
2069            throw RipleyException("Ripley only supports filters for scalar data.");
2070        }
2071        escript::Data res=randomFillWorker(shape, seed, filter);
2072        if (res.getFunctionSpace()!=what)
2073        {
2074            escript::Data r=escript::Data(res, what);
2075            return r;
2076        }
2077        return res;
2078    }
2079
2080
2081  /* This routine produces a Data object filled with smoothed random data.  /* This routine produces a Data object filled with smoothed random data.
2082  The dimensions of the rectangle being filled are internal[0] x internal[1] points.  The dimensions of the rectangle being filled are internal[0] x internal[1] points.
# Line 1922  A parameter radius  gives the size of th Line 2084  A parameter radius  gives the size of th
2084  A point on the left hand edge for example, will still require `radius` extra points to the left  A point on the left hand edge for example, will still require `radius` extra points to the left
2085  in order to complete the stencil.  in order to complete the stencil.
2086
2087  All local calculation is done on an array called `src`, with  All local calculation is done on an array called `src`, with
2088  dimensions = ext[0] * ext[1].  dimensions = ext[0] * ext[1].
2090
2091  Now for MPI there is overlap to deal with. We need to share both the overlapping  Now for MPI there is overlap to deal with. We need to share both the overlapping
2092  values themselves but also the external region.  values themselves but also the external region.
2093
2094  In a hypothetical 1-D case:  In a hypothetical 1-D case:
# Line 1945  need to be known. Line 2107  need to be known.
2107
2108  pp123(4)56   23(4)567pp  pp123(4)56   23(4)567pp
2109
2110  Now in our case, we wout set all the values 23456 on the left rank and send them to the  Now in our case, we wout set all the values 23456 on the left rank and send them to the
2111  right hand rank.  right hand rank.
2112
2113  So the edges _may_ need to be shared at a distance `inset` from all boundaries.  So the edges _may_ need to be shared at a distance `inset` from all boundaries.
2115  This is to ensure that values at distance `radius` from the shared/overlapped element  This is to ensure that values at distance `radius` from the shared/overlapped element
2116  that ripley has.  that ripley has.
2117
2118
2119  */  */
2120  escript::Data Rectangle::randomFill(long seed, const boost::python::tuple& filter) const  escript::Data Rectangle::randomFillWorker(const escript::DataTypes::ShapeType& shape,
2121           long seed, const boost::python::tuple& filter) const
2122  {  {
2123      if (m_numDim!=2)      if (m_numDim!=2)
2124      {      {
2125          throw RipleyException("Only 2D supported at this time.");          throw RipleyException("Only 2D supported at this time.");
2126      }      }
2127      if (len(filter)!=3) {
2128          throw RipleyException("Unsupported random filter");      unsigned int radius=0;      // these are only used by gaussian
2129      }      double sigma=0.5;
2130      boost::python::extract<string> ex(filter[0]);
2131      if (!ex.check() || (ex()!="gaussian"))      unsigned int numvals=escript::DataTypes::noValues(shape);
2132
2133
2134        if (len(filter)==0)
2135      {      {
2136          throw RipleyException("Unsupported random filter");          // nothing special required here yet
2137      }      }
2138      boost::python::extract<unsigned int> ex1(filter[1]);      else if (len(filter)==3)
if (!ex1.check())
2139      {      {
2140          throw RipleyException("Radius of gaussian filter must be a positive integer.");          boost::python::extract<string> ex(filter[0]);
2141            if (!ex.check() || (ex()!="gaussian"))
2142            {
2143                throw RipleyException("Unsupported random filter");
2144            }
2145            boost::python::extract<unsigned int> ex1(filter[1]);
2146            if (!ex1.check())
2147            {
2148                throw RipleyException("Radius of gaussian filter must be a positive integer.");
2149            }
2151            sigma=0.5;
2152            boost::python::extract<double> ex2(filter[2]);
2153            if (!ex2.check() || (sigma=ex2())<=0)
2154            {
2155                throw RipleyException("Sigma must be a postive floating point number.");
2156            }
2157      }      }
double sigma=0.5;
boost::python::extract<double> ex2(filter[2]);
if (!ex2.check() || (sigma=ex2())<=0)
2159      {      {
2160          throw RipleyException("Sigma must be a postive floating point number.");          throw RipleyException("Unsupported random filter for Rectangle.");
2161      }      }
2162
2163
2164
2165      size_t internal[2];      size_t internal[2];
2166      internal[0]=m_NE[0]+1;        // number of points in the internal region      internal[0]=m_NE[0]+1;      // number of points in the internal region
2167      internal[1]=m_NE[1]+1;        // that is, the ones we need smoothed versions of      internal[1]=m_NE[1]+1;      // that is, the ones we need smoothed versions of
2168      size_t ext[2];      size_t ext[2];
2169      ext[0]=internal[0]+2*radius;        // includes points we need as input      ext[0]=internal[0]+2*radius;        // includes points we need as input
2171
2172      // now we check to see if the radius is acceptable      // now we check to see if the radius is acceptable
2173      // That is, would not cross multiple ranks in MPI      // That is, would not cross multiple ranks in MPI
2174
2176      {      {
2177          throw RipleyException("Radius of gaussian filter must be less than half the width/height of a rank");          throw RipleyException("Radius of gaussian filter is too large for X dimension of a rank");
2178      }      }

double* src=new double[ext[0]*ext[1]];
esysUtils::randomFillArray(seed, src, ext[0]*ext[1]);

#ifdef ESYS_MPI
size_t Eheight=ext[1]-2*inset;        // how high the E (shared) region is
size_t Swidth=ext[0]-2*inset;

double* SWin=new double[inset*inset];  memset(SWin, 0, inset*inset*sizeof(double));
double* SEin=new double[inset*inset];  memset(SEin, 0, inset*inset*sizeof(double));
double* NWin=new double[inset*inset];  memset(NWin, 0, inset*inset*sizeof(double));
double* Sin=new double[inset*Swidth];  memset(Sin, 0, inset*Swidth*sizeof(double));
double* Win=new double[inset*Eheight];  memset(Win, 0, inset*Eheight*sizeof(double));

double* NEout=new double[inset*inset];  memset(NEout, 0, inset*inset*sizeof(double));
unsigned int base=ext[0]-inset+(ext[1]-inset)*ext[0];
for (unsigned int i=0;i<inset;++i)
{
memcpy(NEout+inset*i, src+base, inset*sizeof(double));
base+=ext[0];
}
double* NWout=new double[inset*inset];  memset(NWout, 0, inset*inset*sizeof(double));
base=(ext[1]-inset)*ext[0];
for (unsigned int i=0;i<inset;++i)
2180      {      {
2181          memcpy(NWout+inset*i, src+base, inset*sizeof(double));          throw RipleyException("Radius of gaussian filter is too large for Y dimension of a rank");
base+=ext[0];
2182      }      }
2183
2184      double* SEout=new double[inset*inset];  memset(SEout, 0, inset*inset*sizeof(double));      double* src=new double[ext[0]*ext[1]*numvals];
2185      base=ext[0]-inset;      esysUtils::randomFillArray(seed, src, ext[0]*ext[1]*numvals);
2186      for (int i=0;i<inset;++i)
{
memcpy(SEout+inset*i, src+base, inset*sizeof(double));
base+=ext[0];
}
double* Nout=new double[inset*Swidth];  memset(Nout, 0, inset*Swidth*sizeof(double));
base=inset+(ext[1]-inset)*ext[0];
for (unsigned int i=0;i<inset;++i)
{
memcpy(Nout+Swidth*i, src+base, Swidth*sizeof(double));
base+=ext[0];
}

double* Eout=new double[inset*Eheight];  memset(Eout, 0, inset*Eheight*sizeof(double));
base=ext[0]-inset+inset*ext[0];
for (unsigned int i=0;i<Eheight;++i)
{
memcpy(Eout+i*inset, src+base, inset*sizeof(double));
base+=ext[0];
}

MPI_Request reqs[10];
MPI_Status stats[10];
short rused=0;

dim_t X=m_mpiInfo->rank%m_NX[0];
dim_t Y=m_mpiInfo->rank/m_NX[0];
dim_t row=m_NX[0];
bool swused=false;                // These vars will be true if data needs to be copied out of them
bool seused=false;
bool nwused=false;
bool sused=false;
bool wused=false;

// Tags:
// 10 : EW transfer (middle)
// 8 : NS transfer (middle)
// 7 : NE corner -> to N, E and NE
// 11 : NW corner to SW corner (only used on the left hand edge
// 12 : SE corner to SW corner (only used on the bottom edge

int comserr=0;
if (Y!=0)        // not on bottom row,
{
if (X!=0)        // not on the left hand edge
{
// recv bottomleft from SW
comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, (X-1)+(Y-1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
swused=true;
comserr|=MPI_Irecv(Win, Eheight*inset, MPI_DOUBLE, X-1+Y*row, 10, m_mpiInfo->comm, reqs+(rused++));
wused=true;
}
else        // on the left hand edge
{
comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, (Y-1)*row, 11, m_mpiInfo->comm, reqs+(rused++));
swused=true;
}
comserr|=MPI_Irecv(Sin, Swidth*inset, MPI_DOUBLE, X+(Y-1)*row, 8, m_mpiInfo->comm, reqs+(rused++));
sused=true;
comserr|=MPI_Irecv(SEin, inset*inset, MPI_DOUBLE, X+(Y-1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
seused=true;

2187
}
else                // on the bottom row
{
if (X!=0)
{
comserr|=MPI_Irecv(Win, Eheight*inset, MPI_DOUBLE, X-1+Y*row, 10, m_mpiInfo->comm, reqs+(rused++));
wused=true;
// Need to use tag 12 here because SW is coming from the East not South East
comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, X-1+Y*row, 12, m_mpiInfo->comm, reqs+(rused++));
swused=true;
}
if (X!=(row-1))
{
comserr|=MPI_Isend(SEout, inset*inset, MPI_DOUBLE, X+1+(Y)*row, 12, m_mpiInfo->comm, reqs+(rused++));
}
}
2188
2189      if (Y!=(m_NX[1]-1))        // not on the top row  #ifdef ESYS_MPI
2190      {      if ((internal[0]<5) || (internal[1]<5))
comserr|=MPI_Isend(Nout, inset*Swidth, MPI_DOUBLE, X+(Y+1)*row, 8, m_mpiInfo->comm, reqs+(rused++));
comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+(Y+1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
if (X!=(row-1))        // not on right hand edge
{
comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+1+(Y+1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
}
if (X==0)        // left hand edge
{
comserr|=MPI_Isend(NWout, inset*inset, MPI_DOUBLE, (Y+1)*row,11, m_mpiInfo->comm, reqs+(rused++));
}
}
if (X!=(row-1))        // not on right hand edge
2191      {      {
2192          comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+1+(Y)*row, 7, m_mpiInfo->comm, reqs+(rused++));          // since the dimensions are equal for all ranks, this exception
2193          comserr|=MPI_Isend(Eout, Eheight*inset, MPI_DOUBLE, X+1+(Y)*row, 10, m_mpiInfo->comm, reqs+(rused++));          // will be thrown on all ranks
2194            throw RipleyException("Random Data in Ripley requries at least five elements per side per rank.");
2195      }      }
2196      if (X!=0)      dim_t X=m_mpiInfo->rank%m_NX[0];
2197      {      dim_t Y=m_mpiInfo->rank%(m_NX[0]*m_NX[1])/m_NX[0];
2198          comserr|=MPI_Irecv(NWin, inset*inset, MPI_DOUBLE, (X-1)+Y*row, 7, m_mpiInfo->comm, reqs+(rused++));  #endif
nwused=true;
2199
2200    /*
2201        // if we wanted to test a repeating pattern
2202        size_t basex=0;
2203        size_t basey=0;
2204    #ifdef ESYS_MPI
2205        basex=X*m_gNE[0]/m_NX[0];
2206        basey=Y*m_gNE[1]/m_NX[1];
2207    #endif
2208
2209        esysUtils::patternFillArray2D(ext[0], ext[1], src, 4, basex, basey, numvals);
2210    */
2211
2212      }
2213    #ifdef ESYS_MPI
2214
2215        BlockGrid2 grid(m_NX[0]-1, m_NX[1]-1);
2216        size_t inset=2*radius+2;    // Its +2 not +1 because a whole element is shared (and hence
2217                    // there is an overlap of two points both of which need to have "radius" points on either side.
2218
2219        size_t xmidlen=ext[0]-2*inset;      // how wide is the x-dimension between the two insets
2220        size_t ymidlen=ext[1]-2*inset;
2221
2222        Block2 block(ext[0], ext[1], inset, xmidlen, ymidlen, numvals);
2223
2224        MPI_Request reqs[40];               // a non-tight upper bound on how many we need
2225        MPI_Status stats[40];
2226        short rused=0;
2227
2228        messvec incoms;
2229        messvec outcoms;
2230
2231        grid.generateInNeighbours(X, Y, incoms);
2232        grid.generateOutNeighbours(X, Y, outcoms);
2233
2234        block.copyAllToBuffer(src);
2235
2236        int comserr=0;
2237        for (size_t i=0;i<incoms.size();++i)
2238        {
2239            message& m=incoms[i];
2240            comserr|=MPI_Irecv(block.getInBuffer(m.destbuffid), block.getBuffSize(m.destbuffid) , MPI_DOUBLE, m.sourceID, m.tag, m_mpiInfo->comm, reqs+(rused++));
2241            block.setUsed(m.destbuffid);
2242        }
2243
2244        for (size_t i=0;i<outcoms.size();++i)
2245        {
2246            message& m=outcoms[i];
2247            comserr|=MPI_Isend(block.getOutBuffer(m.srcbuffid), block.getBuffSize(m.srcbuffid) , MPI_DOUBLE, m.destID, m.tag, m_mpiInfo->comm, reqs+(rused++));
2248        }
2249
2250      if (!comserr)      if (!comserr)
2251      {      {
2252          comserr=MPI_Waitall(rused, reqs, stats);          comserr=MPI_Waitall(rused, reqs, stats);
2253      }      }
2254
# Line 2148  escript::Data Rectangle::randomFill(long Line 2257  escript::Data Rectangle::randomFill(long
2257          // Yes this is throwing an exception as a result of an MPI error.          // Yes this is throwing an exception as a result of an MPI error.
2258          // and no we don't inform the other ranks that we are doing this.          // and no we don't inform the other ranks that we are doing this.
2259          // however, we have no reason to believe coms work at this point anyway          // however, we have no reason to believe coms work at this point anyway
2260          throw RipleyException("Error in coms for randomFill");          throw RipleyException("Error in coms for randomFill");
2261      }      }
2262
2263        block.copyUsedFromBuffer(src);
2264      // Need to copy the values back from the buffers
2265      // Copy from SW  #endif
2266
2267      if (swused)      if (radius==0 || numvals>1) // the truth of either should imply the truth of the other but let's be safe
2268      {      {
2269          base=0;
2270          for (unsigned int i=0;i<inset;++i)          escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2271            escript::Data resdat(0, shape, fs , true);
2272            // don't need to check for exwrite because we just made it
2273            escript::DataVector& dv=resdat.getExpandedVectorReference();
2274
2275
2276            // now we need to copy values over
2277            for (size_t y=0;y<(internal[1]);++y)
2278          {          {
2279              memcpy(src+base, SWin+i*inset, inset*sizeof(double));              for (size_t x=0;x<(internal[0]);++x)
2280              base+=ext[0];              {
2281          }                  for (unsigned int i=0;i<numvals;++i)
2282      }                  {
2283      if (seused)                      dv[i+(x+y*(internal[0]))*numvals]=src[i+(x+y*ext[0])*numvals];
2284      {                  }
2285          base=ext[0]-inset;              }
for (unsigned int i=0;i<inset;++i)
{
memcpy(src+base, SEin+i*inset, inset*sizeof(double));
base+=ext[0];
}
}
if (nwused)
{
base=(ext[1]-inset)*ext[0];
for (unsigned int i=0;i<inset;++i)
{
memcpy(src+base, NWin+i*inset, inset*sizeof(double));
base+=ext[0];
}
}
if (sused)
{
base=inset;
for (unsigned int i=0;i<inset;++i)
{
memcpy(src+base, Sin+i*Swidth, Swidth*sizeof(double));
base+=ext[0];
}
}
if (wused)
{
base=inset*ext[0];
for (unsigned int i=0;i<Eheight;++i)
{
memcpy(src+base, Win+i*inset, inset*sizeof(double));
base+=ext[0];
2286          }          }
2287            delete[] src;
2288            return resdat;
2289      }      }
2290        else                // filter enabled
2291      delete[] SWin;      {
2292      delete[] SEin;          escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2293      delete[] NWin;          escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
2294      delete[] Sin;          // don't need to check for exwrite because we just made it
2295      delete[] Win;          escript::DataVector& dv=resdat.getExpandedVectorReference();
2297      delete[] NEout;          for (size_t y=0;y<(internal[1]);++y)
delete[] NWout;
delete[] SEout;
delete[] Nout;
delete[] Eout;
#endif
escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
// don't need to check for exwrite because we just made it
escript::DataVector& dv=resdat.getExpandedVectorReference();
for (size_t y=0;y<(internal[1]);++y)
{
for (size_t x=0;x<(internal[0]);++x)
2298          {          {
2300                {
2302
2303                }
2304          }          }
2305            delete[] convolution;
2306            delete[] src;
2307            return resdat;
2308      }      }
delete[] convolution;
delete[] src;
return resdat;
2309  }  }
2310
2311  int Rectangle::findNode(const double *coords) const {  int Rectangle::findNode(const double *coords) const {
# Line 2264  int Rectangle::findNode(const double *co Line 2340  int Rectangle::findNode(const double *co
2340              double ydist = (y - (ey + dy)*m_dx[1]);              double ydist = (y - (ey + dy)*m_dx[1]);
2341              double total = xdist*xdist + ydist*ydist;              double total = xdist*xdist + ydist*ydist;
2342              if (total < minDist) {              if (total < minDist) {
2343                  closest = INDEX2(ex+dx-m_offset[0], ey+dy-m_offset[1], m_NE[0] + 1);                  closest = INDEX2(ex+dx-m_offset[0], ey+dy-m_offset[1], m_NE[0] + 1);
2344                  minDist = total;                  minDist = total;
2345              }              }
2346          }          }
# Line 2280  int Rectangle::findNode(const double *co Line 2356  int Rectangle::findNode(const double *co
2356  void Rectangle::setAssembler(std::string type, std::map<std::string,  void Rectangle::setAssembler(std::string type, std::map<std::string,
2357          escript::Data> constants) {          escript::Data> constants) {
2358      if (type.compare("WaveAssembler") == 0) {      if (type.compare("WaveAssembler") == 0) {
2359            if (assembler_type != WAVE_ASSEMBLER && assembler_type != DEFAULT_ASSEMBLER)
2360                throw RipleyException("Domain already using a different custom assembler");
2361            assembler_type = WAVE_ASSEMBLER;
2362          delete assembler;          delete assembler;
2363          assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants);          assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants);
2364        } else if (type.compare("LameAssembler") == 0) {
2365            if (assembler_type != LAME_ASSEMBLER && assembler_type != DEFAULT_ASSEMBLER)
2366                throw RipleyException("Domain already using a different custom assembler");
2367            assembler_type = LAME_ASSEMBLER;
2368            delete assembler;
2369            assembler = new LameAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
2370      } else { //else ifs would go before this for other types      } else { //else ifs would go before this for other types
2371          throw RipleyException("Ripley::Rectangle does not support the"          throw RipleyException("Ripley::Rectangle does not support the"
2372                                  " requested assembler");                                  " requested assembler");

Legend:
 Removed from v.4675 changed lines Added in v.4765