# Diff of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

revision 3748 by caltinay, Thu Dec 15 07:36:19 2011 UTC revision 3756 by caltinay, Fri Jan 6 02:35:19 2012 UTC
# Line 43  Rectangle::Rectangle(int n0, int n1, dou Line 43  Rectangle::Rectangle(int n0, int n1, dou
43      if (m_NX*m_NY != m_mpiInfo->size)      if (m_NX*m_NY != m_mpiInfo->size)
44          throw RipleyException("Invalid number of spatial subdivisions");          throw RipleyException("Invalid number of spatial subdivisions");
45
46      if (n0%m_NX > 0 || n1%m_NY > 0)      if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0)
47          throw RipleyException("Number of elements must be separable into number of ranks in each dimension");          throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
48
49      // local number of elements      if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2))
50      m_NE0 = n0/m_NX;          throw RipleyException("Too few elements for the number of ranks");
51      m_NE1 = n1/m_NY;
52      // local number of nodes (not necessarily owned)      // local number of elements (including overlap)
53        m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
54        if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
55            m_NE0++;
56        m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
57        if (m_mpiInfo->rank/m_NX>0 && m_mpiInfo->rank/m_NX<m_NY-1)
58            m_NE1++;
59
60        // local number of nodes
61      m_N0 = m_NE0+1;      m_N0 = m_NE0+1;
62      m_N1 = m_NE1+1;      m_N1 = m_NE1+1;
63
64      // bottom-left node is at (offset0,offset1) in global mesh      // bottom-left node is at (offset0,offset1) in global mesh
65      m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);      m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
66      m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);      if (m_offset0 > 0)
67            m_offset0--;
68        m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank/m_NX);
69        if (m_offset1 > 0)
70            m_offset1--;
71
72      populateSampleIds();      populateSampleIds();
73        createPattern();
74  }  }
75
76  Rectangle::~Rectangle()  Rectangle::~Rectangle()
# Line 228  const int* Rectangle::borrowSampleRefere Line 243  const int* Rectangle::borrowSampleRefere
243          case Nodes:          case Nodes:
244          case ReducedNodes: //FIXME: reduced          case ReducedNodes: //FIXME: reduced
245              return &m_nodeId[0];              return &m_nodeId[0];
246            case DegreesOfFreedom:
247            case ReducedDegreesOfFreedom: //FIXME: reduced
248                return &m_dofId[0];
249          case Elements:          case Elements:
250          case ReducedElements:          case ReducedElements:
251              return &m_elementId[0];              return &m_elementId[0];
# Line 247  const int* Rectangle::borrowSampleRefere Line 265  const int* Rectangle::borrowSampleRefere
265  bool Rectangle::ownSample(int fsCode, index_t id) const  bool Rectangle::ownSample(int fsCode, index_t id) const
266  {  {
267  #ifdef ESYS_MPI  #ifdef ESYS_MPI
268      if (fsCode != ReducedNodes) {      if (fsCode == Nodes) {
269          const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];          return (m_dofMap[id] < getNumDOF());
const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
270      } else {      } else {
271          stringstream msg;          stringstream msg;
272          msg << "ownSample() not implemented for "          msg << "ownSample() not implemented for "
# Line 734  Paso_SystemMatrixPattern* Rectangle::get Line 750  Paso_SystemMatrixPattern* Rectangle::get
750      if (reducedRowOrder || reducedColOrder)      if (reducedRowOrder || reducedColOrder)
751          throw RipleyException("getPattern() not implemented for reduced order");          throw RipleyException("getPattern() not implemented for reduced order");
752
753      // connector      return m_pattern;
RankVector neighbour;
IndexVector offsetInShared(1,0);
IndexVector sendShared, recvShared;
const IndexVector faces=getNumFacesPerBoundary();
const index_t left = (m_offset0==0 ? 0 : 1);
const index_t bottom = (m_offset1==0 ? 0 : 1);
// corner node from bottom-left
if (faces[0] == 0 && faces[2] == 0) {
neighbour.push_back(m_mpiInfo->rank-m_NX-1);
offsetInShared.push_back(offsetInShared.back()+1);
sendShared.push_back(m_nodeId[m_N0+left]);
recvShared.push_back(m_nodeId[0]);
}
// bottom edge
if (faces[2] == 0) {
neighbour.push_back(m_mpiInfo->rank-m_NX);
offsetInShared.push_back(offsetInShared.back()+m_N0-left);
for (dim_t i=left; i<m_N0; i++) {
// easy case, we know the neighbour id's
sendShared.push_back(m_nodeId[m_N0+i]);
recvShared.push_back(m_nodeId[i]);
}
}
// corner node from bottom-right
if (faces[1] == 0 && faces[2] == 0) {
neighbour.push_back(m_mpiInfo->rank-m_NX+1);
const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
const index_t first=m_nodeDistribution[neighbour.back()];
offsetInShared.push_back(offsetInShared.back()+1);
sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
recvShared.push_back(first+N0*(N1-1));
}
// left edge
if (faces[0] == 0) {
neighbour.push_back(m_mpiInfo->rank-1);
offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
for (dim_t i=bottom; i<m_N1; i++) {
// easy case, we know the neighbour id's
sendShared.push_back(m_nodeId[i*m_N0+1]);
recvShared.push_back(m_nodeId[i*m_N0]);
}
}
// right edge
if (faces[1] == 0) {
neighbour.push_back(m_mpiInfo->rank+1);
const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
const index_t first=m_nodeDistribution[neighbour.back()];
offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
for (dim_t i=bottom; i<m_N1; i++) {
sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
recvShared.push_back(first+rightN0*(i-bottom));
}
}
// corner node from top-left
if (faces[0] == 0 && faces[3] == 0) {
neighbour.push_back(m_mpiInfo->rank+m_NX-1);
const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
const index_t first=m_nodeDistribution[neighbour.back()];
offsetInShared.push_back(offsetInShared.back()+1);
sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
recvShared.push_back(first+N0-1);
}
// top edge
if (faces[3] == 0) {
neighbour.push_back(m_mpiInfo->rank+m_NX);
const index_t first=m_nodeDistribution[neighbour.back()];
offsetInShared.push_back(offsetInShared.back()+m_N0-left);
for (dim_t i=left; i<m_N0; i++) {
sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
recvShared.push_back(first+i-left);
}
}
// corner node from top-right
if (faces[1] == 0 && faces[3] == 0) {
neighbour.push_back(m_mpiInfo->rank+m_NX+1);
const index_t first=m_nodeDistribution[neighbour.back()];
offsetInShared.push_back(offsetInShared.back()+1);
sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
recvShared.push_back(first);
}
const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
/*
cout << "--- rcv_shcomp ---" << endl;
cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
for (size_t i=0; i<neighbour.size(); i++) {
cout << "neighbor[" << i << "]=" << neighbour[i]
<< " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
}
for (size_t i=0; i<recvShared.size(); i++) {
cout << "shared[" << i << "]=" << recvShared[i] << endl;
}
cout << "--- snd_shcomp ---" << endl;
for (size_t i=0; i<sendShared.size(); i++) {
cout << "shared[" << i << "]=" << sendShared[i] << endl;
}
*/

Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
&offsetInShared[0], 1, 0, m_mpiInfo);
Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
&offsetInShared[0], 1, 0, m_mpiInfo);
Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
Paso_SharedComponents_free(snd_shcomp);
Paso_SharedComponents_free(rcv_shcomp);

// create patterns
dim_t M, N;
IndexVector ptr(1,0);
IndexVector index;

// main pattern
for (index_t i=0; i<numDOF; i++) {
// always add the node itself
index.push_back(i);
const int num=insertNeighbours(index, i);
ptr.push_back(ptr.back()+num+1);
}
M=N=ptr.size()-1;
// paso will manage the memory
index_t* indexC = MEMALLOC(index.size(),index_t);
index_t* ptrC = MEMALLOC(ptr.size(), index_t);
copy(index.begin(), index.end(), indexC);
copy(ptr.begin(), ptr.end(), ptrC);
Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
M, N, ptrC, indexC);

/*
cout << "--- main_pattern ---" << endl;
cout << "M=" << M << ", N=" << N << endl;
for (size_t i=0; i<ptr.size(); i++) {
cout << "ptr[" << i << "]=" << ptr[i] << endl;
}
for (size_t i=0; i<index.size(); i++) {
cout << "index[" << i << "]=" << index[i] << endl;
}
*/

ptr.clear();
index.clear();

// column & row couple patterns
Paso_Pattern *colCouplePattern, *rowCouplePattern;
generateCouplePatterns(&colCouplePattern, &rowCouplePattern);

// allocate paso distribution
Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);

Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
MATRIX_FORMAT_DEFAULT, distribution, distribution,
mainPattern, colCouplePattern, rowCouplePattern,
connector, connector);
Paso_Pattern_free(mainPattern);
Paso_Pattern_free(colCouplePattern);
Paso_Pattern_free(rowCouplePattern);
Paso_Distribution_free(distribution);
return pattern;
754  }  }
755
756  void Rectangle::Print_Mesh_Info(const bool full) const  void Rectangle::Print_Mesh_Info(const bool full) const
# Line 959  pair<double,double> Rectangle::getFirstC Line 815  pair<double,double> Rectangle::getFirstC
815  }  }
816
817  //protected  //protected
818    dim_t Rectangle::getNumDOF() const
819    {
820        return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY;
821    }
822
823    //protected
824  dim_t Rectangle::getNumFaceElements() const  dim_t Rectangle::getNumFaceElements() const
825  {  {
826      const IndexVector faces = getNumFacesPerBoundary();      const IndexVector faces = getNumFacesPerBoundary();
# Line 991  void Rectangle::assembleCoordinates(escr Line 853  void Rectangle::assembleCoordinates(escr
853      }      }
854  }  }
855
856  //private  //protected
857  void Rectangle::populateSampleIds()  dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const
858  {  {
859      // identifiers are ordered from left to right, bottom to top on each rank,      const dim_t nDOF0 = (m_gNE0+1)/m_NX;
860      // except for the shared nodes which are owned by the rank below / to the      const dim_t nDOF1 = (m_gNE1+1)/m_NY;
861      // left of the current rank      const int x=node%nDOF0;
862        const int y=node/nDOF0;
863      // build node distribution vector first.      dim_t num=0;
864      // m_nodeDistribution[i] is the first node id on rank i, that is      // loop through potential neighbours and add to index if positions are
865      // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes      // within bounds
866      m_nodeDistribution.assign(m_mpiInfo->size+1, 0);      for (int i1=-1; i1<2; i1++) {
867      m_nodeDistribution[1]=getNumNodes();          for (int i0=-1; i0<2; i0++) {
868      for (dim_t k=1; k<m_mpiInfo->size-1; k++) {              // skip node itself
869          const index_t x=k%m_NX;              if (i0==0 && i1==0)
870          const index_t y=k/m_NX;                  continue;
871          index_t numNodes=getNumNodes();              // location of neighbour node
872          if (x>0)              const int nx=x+i0;
873              numNodes-=m_N1;              const int ny=y+i1;
874          if (y>0)              if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
875              numNodes-=m_N0;                  index.push_back(ny*nDOF0+nx);
876          if (x>0 && y>0)                  num++;
877              numNodes++; // subtracted corner twice -> fix that              }
878          m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;          }
879      }      }
m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
880
881      m_nodeId.resize(getNumNodes());      return num;
882    }
883
884    //protected
885    void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const
886    {
887        const dim_t numComp = in.getDataPointSize();
888        out.requireWrite();
889
// the bottom row and left column are not owned by this rank so the
// identifiers need to be computed accordingly
890      const index_t left = (m_offset0==0 ? 0 : 1);      const index_t left = (m_offset0==0 ? 0 : 1);
891      const index_t bottom = (m_offset1==0 ? 0 : 1);      const index_t bottom = (m_offset1==0 ? 0 : 1);
892      if (left>0) {      const dim_t nDOF0 = (m_gNE0+1)/m_NX;
893          const int neighbour=m_mpiInfo->rank-1;      const dim_t nDOF1 = (m_gNE1+1)/m_NY;
const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
894  #pragma omp parallel for  #pragma omp parallel for
895          for (dim_t i1=bottom; i1<m_N1; i1++) {      for (index_t i=0; i<nDOF1; i++) {
896              m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]          for (index_t j=0; j<nDOF0; j++) {
897                  + (i1-bottom+1)*leftN0-1;              const index_t n=j+left+(i+bottom)*m_N0;
898                const double* src=in.getSampleDataRO(n);
899                copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
900          }          }
901      }      }
902      if (bottom>0) {  }
903          const int neighbour=m_mpiInfo->rank-m_NX;
904          const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);  //protected
905          const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);  void Rectangle::dofToNodes(escript::Data& out, escript::Data& in) const
906    {
907        const dim_t numComp = in.getDataPointSize();
908        Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
909        in.requireWrite();
910        Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
911
912        const dim_t numDOF = getNumDOF();
913        out.requireWrite();
914        const double* buffer = Paso_Coupler_finishCollect(coupler);
915
916  #pragma omp parallel for  #pragma omp parallel for
917          for (dim_t i0=left; i0<m_N0; i0++) {      for (index_t i=0; i<getNumNodes(); i++) {
918              m_nodeId[i0]=m_nodeDistribution[neighbour]          const double* src=(m_dofMap[i]<numDOF ?
919                  + (bottomN1-1)*bottomN0 + i0 - left;                  in.getSampleDataRO(m_dofMap[i])
920          }                  : &buffer[(m_dofMap[i]-numDOF)*numComp]);
921            copy(src, src+numComp, out.getSampleDataRW(i));
922      }      }
923      if (left>0 && bottom>0) {  }
924          const int neighbour=m_mpiInfo->rank-m_NX-1;
925          const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);  //private
926          const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);  void Rectangle::populateSampleIds()
927          m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;  {
928        // identifiers are ordered from left to right, bottom to top globablly.
929
930        // build node distribution vector first.
931        // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
932        m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
933        const dim_t numDOF=getNumDOF();
934        for (dim_t k=1; k<m_mpiInfo->size; k++) {
935            m_nodeDistribution[k]=k*numDOF;
936      }      }
937        m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
938        m_nodeId.resize(getNumNodes());
939        m_dofId.resize(numDOF);
940        m_elementId.resize(getNumElements());
941        m_faceId.resize(getNumFaceElements());
942
943      // the rest of the id's are contiguous  #pragma omp parallel
944      const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];      {
945  #pragma omp parallel for          // nodes
946      for (dim_t i1=bottom; i1<m_N1; i1++) {  #pragma omp for nowait
947          for (dim_t i0=left; i0<m_N0; i0++) {          for (dim_t i1=0; i1<m_N1; i1++) {
948              m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);              for (dim_t i0=0; i0<m_N0; i0++) {
949                    m_nodeId[i0+i1*m_N0] = (m_offset1+i1)*(m_gNE0+1)+m_offset0+i0;
950                }
951          }          }
952      }
953            // degrees of freedom
954    #pragma omp for nowait
955            for (dim_t k=0; k<numDOF; k++)
956                m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
957
958            // elements
959    #pragma omp for nowait
960            for (dim_t i1=0; i1<m_NE1; i1++) {
961                for (dim_t i0=0; i0<m_NE0; i0++) {
962                    m_elementId[i0+i1*m_NE0]=(m_offset1+i1)*m_gNE0+m_offset0+i0;
963                }
964            }
965
966            // face elements
967    #pragma omp for
968            for (dim_t k=0; k<getNumFaceElements(); k++)
969                m_faceId[k]=k;
970        } // end parallel section
971
972      m_nodeTags.assign(getNumNodes(), 0);      m_nodeTags.assign(getNumNodes(), 0);
973      updateTagsInUse(Nodes);      updateTagsInUse(Nodes);
974
// elements
m_elementId.resize(getNumElements());
#pragma omp parallel for
for (dim_t k=0; k<getNumElements(); k++) {
m_elementId[k]=k;
}
975      m_elementTags.assign(getNumElements(), 0);      m_elementTags.assign(getNumElements(), 0);
976      updateTagsInUse(Elements);      updateTagsInUse(Elements);
977
// face element id's
m_faceId.resize(getNumFaceElements());
#pragma omp parallel for
for (dim_t k=0; k<getNumFaceElements(); k++) {
m_faceId[k]=k;
}

978      // generate face offset vector and set face tags      // generate face offset vector and set face tags
979      const IndexVector facesPerEdge = getNumFacesPerBoundary();      const IndexVector facesPerEdge = getNumFacesPerBoundary();
980      const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;      const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
# Line 1098  void Rectangle::populateSampleIds() Line 997  void Rectangle::populateSampleIds()
997  }  }
998
999  //private  //private
1000  int Rectangle::insertNeighbours(IndexVector& index, index_t node) const  void Rectangle::createPattern()
1001  {  {
1002      const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);      const dim_t nDOF0 = (m_gNE0+1)/m_NX;
1003      const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);      const dim_t nDOF1 = (m_gNE1+1)/m_NY;
1004      const int x=node%myN0;      const index_t left = (m_offset0==0 ? 0 : 1);
1005      const int y=node/myN0;      const index_t bottom = (m_offset1==0 ? 0 : 1);
1006      int num=0;
1007      if (y>0) {      // populate node->DOF mapping with own degrees of freedom.
1008          if (x>0) {      // The rest is assigned in the loop further down
1009              // bottom-left      m_dofMap.assign(getNumNodes(), 0);
1010              index.push_back(node-myN0-1);  #pragma omp parallel for
1011              num++;      for (index_t i=bottom; i<m_N1; i++) {
1012          }          for (index_t j=left; j<m_N0; j++) {
1013          // bottom              m_dofMap[i*m_N0+j]=(i-bottom)*nDOF0+j-left;
index.push_back(node-myN0);
num++;
if (x<myN0-1) {
// bottom-right
index.push_back(node-myN0+1);
num++;
}
}
if (x<myN0-1) {
// right
index.push_back(node+1);
num++;
if (y<myN1-1) {
// top-right
index.push_back(node+myN0+1);
num++;
1014          }          }
1015      }      }
1016      if (y<myN1-1) {
1017          // top      // build list of shared components and neighbours by looping through
1018          index.push_back(node+myN0);      // all potential neighbouring ranks and checking if positions are
1019          num++;      // within bounds
1020          if (x>0) {      const dim_t numDOF=nDOF0*nDOF1;
1021              // top-left      vector<IndexVector> colIndices(numDOF); // for the couple blocks
1022              index.push_back(node+myN0-1);      RankVector neighbour;
1023              num++;      IndexVector offsetInShared(1,0);
1024        IndexVector sendShared, recvShared;
1025        int numShared=0;
1026        const int x=m_mpiInfo->rank%m_NX;
1027        const int y=m_mpiInfo->rank/m_NX;
1028        for (int i1=-1; i1<2; i1++) {
1029            for (int i0=-1; i0<2; i0++) {
1030                // skip this rank
1031                if (i0==0 && i1==0)
1032                    continue;
1033                // location of neighbour rank
1034                const int nx=x+i0;
1035                const int ny=y+i1;
1036                if (nx>=0 && ny>=0 && nx<m_NX && ny<m_NY) {
1037                    neighbour.push_back(ny*m_NX+nx);
1038                    if (i0==0) {
1039                        // sharing top or bottom edge
1040                        const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
1041                        const int firstNode=(i1==-1 ? left : m_N0*(m_N1-1)+left);
1042                        offsetInShared.push_back(offsetInShared.back()+nDOF0);
1043                        for (dim_t i=0; i<nDOF0; i++, numShared++) {
1044                            sendShared.push_back(firstDOF+i);
1045                            recvShared.push_back(numDOF+numShared);
1046                            if (i>0)
1047                                colIndices[firstDOF+i-1].push_back(numShared);
1048                            colIndices[firstDOF+i].push_back(numShared);
1049                            if (i<nDOF0-1)
1050                                colIndices[firstDOF+i+1].push_back(numShared);
1051                            m_dofMap[firstNode+i]=numDOF+numShared;
1052                        }
1053                    } else if (i1==0) {
1054                        // sharing left or right edge
1055                        const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
1056                        const int firstNode=(i0==-1 ? bottom*m_N0 : (bottom+1)*m_N0-1);
1057                        offsetInShared.push_back(offsetInShared.back()+nDOF1);
1058                        for (dim_t i=0; i<nDOF1; i++, numShared++) {
1059                            sendShared.push_back(firstDOF+i*nDOF0);
1060                            recvShared.push_back(numDOF+numShared);
1061                            if (i>0)
1062                                colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1063                            colIndices[firstDOF+i*nDOF0].push_back(numShared);
1064                            if (i<nDOF1-1)
1065                                colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1066                            m_dofMap[firstNode+i*m_N0]=numDOF+numShared;
1067                        }
1068                    } else {
1069                        // sharing a node
1070                        const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
1071                        const int node=(i0+1)/2*(m_N0-1)+(i1+1)/2*m_N0*(m_N1-1);
1072                        offsetInShared.push_back(offsetInShared.back()+1);
1073                        sendShared.push_back(dof);
1074                        recvShared.push_back(numDOF+numShared);
1075                        colIndices[dof].push_back(numShared);
1076                        m_dofMap[node]=numDOF+numShared;
1077                        ++numShared;
1078                    }
1079                }
1080          }          }
1081      }      }
if (x>0) {
// left
index.push_back(node-1);
num++;
}
1082
1083      return num;      // create connector
1084  }      Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1085                numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1086                &offsetInShared[0], 1, 0, m_mpiInfo);
1087        Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1088                numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1089                &offsetInShared[0], 1, 0, m_mpiInfo);
1090        m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1091        Paso_SharedComponents_free(snd_shcomp);
1092        Paso_SharedComponents_free(rcv_shcomp);
1093
1094  //private      // create main and couple blocks
1095  void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const      Paso_Pattern *mainPattern = createMainPattern();
1096  {      Paso_Pattern *colPattern, *rowPattern;
1097      IndexVector ptr(1,0);      createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);
IndexVector index;
const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
const IndexVector faces=getNumFacesPerBoundary();
1098
1099      // bottom edge      // allocate paso distribution
1100      dim_t n=0;      Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1101      if (faces[0] == 0) {              const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
index.push_back(2*(myN0+myN1+1));
index.push_back(2*(myN0+myN1+1)+1);
n+=2;
if (faces[2] == 0) {
index.push_back(0);
index.push_back(1);
index.push_back(2);
n+=3;
}
} else if (faces[2] == 0) {
index.push_back(1);
index.push_back(2);
n+=2;
}
// n=neighbours of bottom-left corner node
ptr.push_back(ptr.back()+n);
n=0;
if (faces[2] == 0) {
for (dim_t i=1; i<myN0-1; i++) {
index.push_back(i);
index.push_back(i+1);
index.push_back(i+2);
ptr.push_back(ptr.back()+3);
}
index.push_back(myN0-1);
index.push_back(myN0);
n+=2;
if (faces[1] == 0) {
index.push_back(myN0+1);
index.push_back(myN0+2);
index.push_back(myN0+3);
n+=3;
}
} else {
for (dim_t i=1; i<myN0-1; i++) {
ptr.push_back(ptr.back());
}
if (faces[1] == 0) {
index.push_back(myN0+2);
index.push_back(myN0+3);
n+=2;
}
}
// n=neighbours of bottom-right corner node
ptr.push_back(ptr.back()+n);
1102
1103      // 2nd row to 2nd last row      // finally create the system matrix
1104      for (dim_t i=1; i<myN1-1; i++) {      m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1105          // left edge              distribution, distribution, mainPattern, colPattern, rowPattern,
1106          if (faces[0] == 0) {              m_connector, m_connector);
index.push_back(2*(myN0+myN1+2)-i);
index.push_back(2*(myN0+myN1+2)-i-1);
index.push_back(2*(myN0+myN1+2)-i-2);
ptr.push_back(ptr.back()+3);
} else {
ptr.push_back(ptr.back());
}
for (dim_t j=1; j<myN0-1; j++) {
ptr.push_back(ptr.back());
}
// right edge
if (faces[1] == 0) {
index.push_back(myN0+i+1);
index.push_back(myN0+i+2);
index.push_back(myN0+i+3);
ptr.push_back(ptr.back()+3);
} else {
ptr.push_back(ptr.back());
}
}
1107
1108      // top edge      Paso_Distribution_free(distribution);
n=0;
if (faces[0] == 0) {
index.push_back(2*myN0+myN1+5);
index.push_back(2*myN0+myN1+4);
n+=2;
if (faces[3] == 0) {
index.push_back(2*myN0+myN1+3);
index.push_back(2*myN0+myN1+2);
index.push_back(2*myN0+myN1+1);
n+=3;
}
} else if (faces[3] == 0) {
index.push_back(2*myN0+myN1+2);
index.push_back(2*myN0+myN1+1);
n+=2;
}
// n=neighbours of top-left corner node
ptr.push_back(ptr.back()+n);
n=0;
if (faces[3] == 0) {
for (dim_t i=1; i<myN0-1; i++) {
index.push_back(2*myN0+myN1+i+1);
index.push_back(2*myN0+myN1+i);
index.push_back(2*myN0+myN1+i-1);
ptr.push_back(ptr.back()+3);
}
index.push_back(myN0+myN1+4);
index.push_back(myN0+myN1+3);
n+=2;
if (faces[1] == 0) {
index.push_back(myN0+myN1+2);
index.push_back(myN0+myN1+1);
index.push_back(myN0+myN1);
n+=3;
}
} else {
for (dim_t i=1; i<myN0-1; i++) {
ptr.push_back(ptr.back());
}
if (faces[1] == 0) {
index.push_back(myN0+myN1+1);
index.push_back(myN0+myN1);
n+=2;
}
}
// n=neighbours of top-right corner node
ptr.push_back(ptr.back()+n);
1109
1110      dim_t M=ptr.size()-1;      // useful debug output
1111      map<index_t,index_t> idMap;      /*
1112      dim_t N=0;      cout << "--- rcv_shcomp ---" << endl;
1113      for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {      cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1114          if (idMap.find(*it)==idMap.end()) {      for (size_t i=0; i<neighbour.size(); i++) {
1115              idMap[*it]=N;          cout << "neighbor[" << i << "]=" << neighbour[i]
1116              *it=N++;              << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1117          } else {      }
1118              *it=idMap[*it];      for (size_t i=0; i<recvShared.size(); i++) {
1119          }          cout << "shared[" << i << "]=" << recvShared[i] << endl;
1120      }      }
1121        cout << "--- snd_shcomp ---" << endl;
1122        for (size_t i=0; i<sendShared.size(); i++) {
1123            cout << "shared[" << i << "]=" << sendShared[i] << endl;
1124        }
1125        cout << "--- dofMap ---" << endl;
1126        for (size_t i=0; i<m_dofMap.size(); i++) {
1127            cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1128        }
1129        cout << "--- colIndices ---" << endl;
1130        for (size_t i=0; i<colIndices.size(); i++) {
1131            cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1132        }
1133        */
1134
1135      /*      /*
1136      cout << "--- colCouple_pattern ---" << endl;      cout << "--- main_pattern ---" << endl;
1137      cout << "M=" << M << ", N=" << N << endl;      cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1138      for (size_t i=0; i<ptr.size(); i++) {      for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1139          cout << "ptr[" << i << "]=" << ptr[i] << endl;          cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1140      }      }
1141      for (size_t i=0; i<index.size(); i++) {      for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1142          cout << "index[" << i << "]=" << index[i] << endl;          cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1143      }      }
1144      */      */
1145
1146      // now build the row couple pattern      /*
1147      IndexVector ptr2(1,0);      cout << "--- colCouple_pattern ---" << endl;
1148      IndexVector index2;      cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1149      for (dim_t id=0; id<N; id++) {      for (size_t i=0; i<colPattern->numOutput+1; i++) {
1150          n=0;          cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
for (dim_t i=0; i<M; i++) {
for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
if (index[j]==id) {
index2.push_back(i);
n++;
break;
}
}
}
ptr2.push_back(ptr2.back()+n);
1151      }      }
1152        for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1153            cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1154        }
1155        */
1156
1157      /*      /*
1158      cout << "--- rowCouple_pattern ---" << endl;      cout << "--- rowCouple_pattern ---" << endl;
1159      cout << "M=" << N << ", N=" << M << endl;      cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1160      for (size_t i=0; i<ptr2.size(); i++) {      for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1161          cout << "ptr[" << i << "]=" << ptr2[i] << endl;          cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1162      }      }
1163      for (size_t i=0; i<index2.size(); i++) {      for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1164          cout << "index[" << i << "]=" << index2[i] << endl;          cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1165      }      }
1166      */      */
1167
1168      // paso will manage the memory      Paso_Pattern_free(mainPattern);
1169      index_t* indexC = MEMALLOC(index.size(), index_t);      Paso_Pattern_free(colPattern);
1170      index_t* ptrC = MEMALLOC(ptr.size(), index_t);      Paso_Pattern_free(rowPattern);
copy(index.begin(), index.end(), indexC);
copy(ptr.begin(), ptr.end(), ptrC);
*colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);

// paso will manage the memory
indexC = MEMALLOC(index2.size(), index_t);
ptrC = MEMALLOC(ptr2.size(), index_t);
copy(index2.begin(), index2.end(), indexC);
copy(ptr2.begin(), ptr2.end(), ptrC);
*rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1171  }  }
1172
1173  //protected  //protected
# Line 1518  void Rectangle::assemblePDESingle(Paso_S Line 1341  void Rectangle::assemblePDESingle(Paso_S
1341  {  {
1342      const double h0 = m_l0/m_gNE0;      const double h0 = m_l0/m_gNE0;
1343      const double h1 = m_l1/m_gNE1;      const double h1 = m_l1/m_gNE1;
1344      /* GENERATOR SNIP_PDE_SINGLE_PRE TOP */      /*** GENERATOR SNIP_PDE_SINGLE_PRE TOP */
1345      const double w0 = -0.1555021169820365539*h1/h0;      const double w0 = -0.1555021169820365539*h1/h0;
1346      const double w1 = 0.041666666666666666667;      const double w1 = 0.041666666666666666667;
1347      const double w10 = -0.041666666666666666667*h0/h1;      const double w10 = -0.041666666666666666667*h0/h1;
# Line 1600  void Rectangle::assemblePDESingle(Paso_S Line 1423  void Rectangle::assemblePDESingle(Paso_S
1423      rhs.requireWrite();      rhs.requireWrite();
1424  #pragma omp parallel  #pragma omp parallel
1425      {      {
1426          for (index_t k1_0=0; k1_0<2; k1_0++) { // coloring          for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1427  #pragma omp for  #pragma omp for
1428              for (index_t k1=k1_0; k1<m_NE1; k1+=2) {              for (index_t k1=k1_0; k1<m_NE1; k1+=2) {
1429                  for (index_t k0=0; k0<m_NE0; ++k0)  {                  for (index_t k0=0; k0<m_NE0; ++k0)  {
# Line 1609  void Rectangle::assemblePDESingle(Paso_S Line 1432  void Rectangle::assemblePDESingle(Paso_S
1432                      vector<double> EM_S(4*4, 0);                      vector<double> EM_S(4*4, 0);
1433                      vector<double> EM_F(4, 0);                      vector<double> EM_F(4, 0);
1434                      const index_t e = k0 + m_NE0*k1;                      const index_t e = k0 + m_NE0*k1;
1435                      /* GENERATOR SNIP_PDE_SINGLE TOP */                      /*** GENERATOR SNIP_PDE_SINGLE TOP */
1436                      ///////////////                      ///////////////
1437                      // process A //                      // process A //
1438                      ///////////////                      ///////////////
# Line 2220  void Rectangle::assemblePDESingle(Paso_S Line 2043  void Rectangle::assemblePDESingle(Paso_S
2043                      /* GENERATOR SNIP_PDE_SINGLE BOTTOM */                      /* GENERATOR SNIP_PDE_SINGLE BOTTOM */
2044
2046                        const index_t firstNode=m_N0*k1+k0;
2047                      IndexVector rowIndex;                      IndexVector rowIndex;
2048                      const index_t firstNode=k1*m_N0+k0;                      rowIndex.push_back(m_dofMap[firstNode]);
2049                      rowIndex.push_back(firstNode);                      rowIndex.push_back(m_dofMap[firstNode+1]);
2050                      rowIndex.push_back(firstNode+1);                      rowIndex.push_back(m_dofMap[firstNode+m_N0]);
2051                      rowIndex.push_back(firstNode+m_N0);                      rowIndex.push_back(m_dofMap[firstNode+m_N0+1]);
rowIndex.push_back(firstNode+1+m_N0);
addToSystemMatrix(mat, 4, rowIndex, 1, 4, rowIndex, 1, EM_S);
}
2053                            //cout << "-- AddtoRHS -- " << endl;
2054                          double *F_p=rhs.getSampleDataRW(0);                          double *F_p=rhs.getSampleDataRW(0);
2055                          for (index_t i=0; i<4; i++) {                          for (index_t i=0; i<rowIndex.size(); i++) {
2056                              if (rowIndex[i]<getNumNodes())                              if (rowIndex[i]<getNumDOF()) {
2057                                  F_p[rowIndex[i]]+=EM_F[i];                                  F_p[rowIndex[i]]+=EM_F[i];
2058                                    //cout << "F[" << rowIndex[i] << "]=" << F_p[rowIndex[i]] << endl;
2059                                }
2060                          }                          }
2061                            //cout << "---"<<endl;
2062                      }                      }
2064                            //cout << "-- AddtoSystem -- " << endl;
2065                            addToSystemMatrix(mat, rowIndex, 1, rowIndex, 1, EM_S);
2066                        }
2067
2068                  } // end k0 loop                  } // end k0 loop
2069              } // end k1 loop              } // end k1 loop
2070          } // end of coloring          } // end of coloring
2071      } // end of parallel region      } // end of parallel region
2072  }  }
2073
const IndexVector& nodes_Eq, dim_t num_Eq, dim_t NN_Sol,
const IndexVector& nodes_Sol, dim_t num_Sol, const vector<double>& array) const
{
const dim_t row_block_size = in->row_block_size;
const dim_t col_block_size = in->col_block_size;
const dim_t block_size = in->block_size;
const dim_t num_subblocks_Eq = num_Eq / row_block_size;
const dim_t num_subblocks_Sol = num_Sol / col_block_size;
const dim_t numMyCols = in->pattern->mainPattern->numInput;
const dim_t numMyRows = in->pattern->mainPattern->numOutput;

const index_t* mainBlock_ptr = in->mainBlock->pattern->ptr;
const index_t* mainBlock_index = in->mainBlock->pattern->index;
double* mainBlock_val = in->mainBlock->val;
const index_t* col_coupleBlock_ptr = in->col_coupleBlock->pattern->ptr;
const index_t* col_coupleBlock_index = in->col_coupleBlock->pattern->index;
double* col_coupleBlock_val = in->col_coupleBlock->val;
const index_t* row_coupleBlock_ptr = in->row_coupleBlock->pattern->ptr;
const index_t* row_coupleBlock_index = in->row_coupleBlock->pattern->index;
double* row_coupleBlock_val = in->row_coupleBlock->val;
for (dim_t k_Eq = 0; k_Eq < NN_Eq; ++k_Eq) {
// down columns of array
const dim_t j_Eq = nodes_Eq[k_Eq];
for (dim_t l_row = 0; l_row < num_subblocks_Eq; ++l_row) {
const dim_t i_row = j_Eq * num_subblocks_Eq + l_row;
// only look at the matrix rows stored on this processor
if (i_row < numMyRows) {
for (dim_t k_Sol = 0; k_Sol < NN_Sol; ++k_Sol) {
// across rows of array
const dim_t j_Sol = nodes_Sol[k_Sol];
for (dim_t l_col = 0; l_col < num_subblocks_Sol; ++l_col) {
// only look at the matrix rows stored on this processor
const dim_t i_col = j_Sol * num_subblocks_Sol + l_col;
if (i_col < numMyCols) {
for (dim_t k = mainBlock_ptr[i_row];
k < mainBlock_ptr[i_row + 1]; ++k) {
if (mainBlock_index[k] == i_col) {
// entry array(k_Sol, j_Eq) is a block
// (row_block_size x col_block_size)
for (dim_t ic = 0; ic < col_block_size; ++ic) {
const dim_t i_Sol = ic + col_block_size * l_col;
for (dim_t ir = 0; ir < row_block_size; ++ir) {
const dim_t i_Eq = ir + row_block_size * l_row;
mainBlock_val[k*block_size + ir + row_block_size*ic] +=
array[INDEX4
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)];
}
}
break;
}
}
} else {
for (dim_t k = col_coupleBlock_ptr[i_row];
k < col_coupleBlock_ptr[i_row + 1]; ++k) {
if (col_coupleBlock_index[k] == i_col - numMyCols) {
// entry array(k_Sol, j_Eq) is a block
// (row_block_size x col_block_size)
for (dim_t ic = 0; ic < col_block_size; ++ic) {
const dim_t i_Sol = ic + col_block_size * l_col;
for (dim_t ir = 0; ir < row_block_size; ++ir) {
const dim_t i_Eq = ir + row_block_size * l_row;
col_coupleBlock_val[k * block_size + ir + row_block_size * ic] +=
array[INDEX4
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)];
}
}
break;
}
}
}
}
}
} else {
for (dim_t k_Sol = 0; k_Sol < NN_Sol; ++k_Sol) {
// across rows of array
const dim_t j_Sol = nodes_Sol[k_Sol];
for (dim_t l_col = 0; l_col < num_subblocks_Sol; ++l_col) {
const dim_t i_col = j_Sol * num_subblocks_Sol + l_col;
if (i_col < numMyCols) {
for (dim_t k = row_coupleBlock_ptr[i_row - numMyRows];
k < row_coupleBlock_ptr[i_row - numMyRows + 1]; ++k)
{
if (row_coupleBlock_index[k] == i_col) {
// entry array(k_Sol, j_Eq) is a block
// (row_block_size x col_block_size)
for (dim_t ic = 0; ic < col_block_size; ++ic) {
const dim_t i_Sol = ic + col_block_size * l_col;
for (dim_t ir = 0; ir < row_block_size; ++ir) {
const dim_t i_Eq = ir + row_block_size * l_row;
row_coupleBlock_val[k * block_size + ir + row_block_size * ic] +=
array[INDEX4
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)];
}
}
break;
}
}
}
}
}
}
}
}
}

2074  } // end of namespace ripley  } // end of namespace ripley
2075

Legend:
 Removed from v.3748 changed lines Added in v.3756