# Diff of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

revision 3748 by caltinay, Thu Dec 15 07:36:19 2011 UTC revision 3754 by caltinay, Wed Jan 4 07:07:37 2012 UTC
# Line 43  Rectangle::Rectangle(int n0, int n1, dou Line 43  Rectangle::Rectangle(int n0, int n1, dou
43      if (m_NX*m_NY != m_mpiInfo->size)      if (m_NX*m_NY != m_mpiInfo->size)
44          throw RipleyException("Invalid number of spatial subdivisions");          throw RipleyException("Invalid number of spatial subdivisions");
45
46      if (n0%m_NX > 0 || n1%m_NY > 0)      if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0)
47          throw RipleyException("Number of elements must be separable into number of ranks in each dimension");          throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
48
49      // local number of elements      if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2))
50      m_NE0 = n0/m_NX;          throw RipleyException("Too few elements for the number of ranks");
51      m_NE1 = n1/m_NY;
52      // local number of nodes (not necessarily owned)      // local number of elements (including overlap)
53        m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
54        if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
55            m_NE0++;
56        m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
57        if (m_mpiInfo->rank/m_NX>0 && m_mpiInfo->rank/m_NX<m_NY-1)
58            m_NE1++;
59
60        // local number of nodes
61      m_N0 = m_NE0+1;      m_N0 = m_NE0+1;
62      m_N1 = m_NE1+1;      m_N1 = m_NE1+1;
63
64      // bottom-left node is at (offset0,offset1) in global mesh      // bottom-left node is at (offset0,offset1) in global mesh
65      m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);      m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
66      m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);      if (m_offset0 > 0)
67            m_offset0--;
68        m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank/m_NX);
69        if (m_offset1 > 0)
70            m_offset1--;
71
72      populateSampleIds();      populateSampleIds();
73  }  }
74
# Line 228  const int* Rectangle::borrowSampleRefere Line 242  const int* Rectangle::borrowSampleRefere
242          case Nodes:          case Nodes:
243          case ReducedNodes: //FIXME: reduced          case ReducedNodes: //FIXME: reduced
244              return &m_nodeId[0];              return &m_nodeId[0];
245            case DegreesOfFreedom:
246            case ReducedDegreesOfFreedom: //FIXME: reduced
247                return &m_dofId[0];
248          case Elements:          case Elements:
249          case ReducedElements:          case ReducedElements:
250              return &m_elementId[0];              return &m_elementId[0];
# Line 247  const int* Rectangle::borrowSampleRefere Line 264  const int* Rectangle::borrowSampleRefere
264  bool Rectangle::ownSample(int fsCode, index_t id) const  bool Rectangle::ownSample(int fsCode, index_t id) const
265  {  {
266  #ifdef ESYS_MPI  #ifdef ESYS_MPI
267      if (fsCode != ReducedNodes) {      if (fsCode == Nodes) {
268          const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];          const index_t left = (m_offset0==0 ? 0 : 1);
269          const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;          const index_t bottom = (m_offset1==0 ? 0 : 1);
270          return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);          const index_t right = (m_mpiInfo->rank%m_NX==m_NX-1 ? m_N0 : m_N0-1);
271            const index_t top = (m_mpiInfo->rank/m_NX==m_NY-1 ? m_N1 : m_N1-1);
272            const index_t x=id%m_N0;
273            const index_t y=id/m_N0;
274            return (x>=left && x<right && y>=bottom && y<top);
275      } else {      } else {
276          stringstream msg;          stringstream msg;
277          msg << "ownSample() not implemented for "          msg << "ownSample() not implemented for "
# Line 739  Paso_SystemMatrixPattern* Rectangle::get Line 760  Paso_SystemMatrixPattern* Rectangle::get
760      IndexVector offsetInShared(1,0);      IndexVector offsetInShared(1,0);
761      IndexVector sendShared, recvShared;      IndexVector sendShared, recvShared;
762      const IndexVector faces=getNumFacesPerBoundary();      const IndexVector faces=getNumFacesPerBoundary();
763        const index_t nDOF0 = (m_gNE0+1)/m_NX;
764        const index_t nDOF1 = (m_gNE1+1)/m_NY;
765        const int numDOF=nDOF0*nDOF1;
766      const index_t left = (m_offset0==0 ? 0 : 1);      const index_t left = (m_offset0==0 ? 0 : 1);
767      const index_t bottom = (m_offset1==0 ? 0 : 1);      const index_t bottom = (m_offset1==0 ? 0 : 1);
768      // corner node from bottom-left      vector<IndexVector> colIndices(numDOF); // for the couple blocks
769      if (faces[0] == 0 && faces[2] == 0) {      int numShared=0;
770          neighbour.push_back(m_mpiInfo->rank-m_NX-1);
771          offsetInShared.push_back(offsetInShared.back()+1);      m_dofMap.assign(getNumNodes(), 0);
772          sendShared.push_back(m_nodeId[m_N0+left]);  #pragma omp parallel for
773          recvShared.push_back(m_nodeId[0]);      for (index_t i=bottom; i<m_N1; i++) {
774      }          for (index_t j=left; j<m_N0; j++) {
775      // bottom edge              m_dofMap[i*m_N0+j]=(i-bottom)*nDOF0+j-left;
if (faces[2] == 0) {
neighbour.push_back(m_mpiInfo->rank-m_NX);
offsetInShared.push_back(offsetInShared.back()+m_N0-left);
for (dim_t i=left; i<m_N0; i++) {
// easy case, we know the neighbour id's
sendShared.push_back(m_nodeId[m_N0+i]);
recvShared.push_back(m_nodeId[i]);
}
}
// corner node from bottom-right
if (faces[1] == 0 && faces[2] == 0) {
neighbour.push_back(m_mpiInfo->rank-m_NX+1);
const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
const index_t first=m_nodeDistribution[neighbour.back()];
offsetInShared.push_back(offsetInShared.back()+1);
sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
recvShared.push_back(first+N0*(N1-1));
}
// left edge
if (faces[0] == 0) {
neighbour.push_back(m_mpiInfo->rank-1);
offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
for (dim_t i=bottom; i<m_N1; i++) {
// easy case, we know the neighbour id's
sendShared.push_back(m_nodeId[i*m_N0+1]);
recvShared.push_back(m_nodeId[i*m_N0]);
}
}
// right edge
if (faces[1] == 0) {
neighbour.push_back(m_mpiInfo->rank+1);
const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
const index_t first=m_nodeDistribution[neighbour.back()];
offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
for (dim_t i=bottom; i<m_N1; i++) {
sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
recvShared.push_back(first+rightN0*(i-bottom));
776          }          }
777      }      }
778      // corner node from top-left
779      if (faces[0] == 0 && faces[3] == 0) {      // build list of shared components and neighbours by looping through
780          neighbour.push_back(m_mpiInfo->rank+m_NX-1);      // all potential neighbouring ranks and checking if positions are
781          const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);      // within bounds
782          const index_t first=m_nodeDistribution[neighbour.back()];      const int x=m_mpiInfo->rank%m_NX;
783          offsetInShared.push_back(offsetInShared.back()+1);      const int y=m_mpiInfo->rank/m_NX;
784          sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);      for (int i1=-1; i1<2; i1++) {
785          recvShared.push_back(first+N0-1);          for (int i0=-1; i0<2; i0++) {
786      }              // skip rank itself
787      // top edge              if (i0==0 && i1==0)
788      if (faces[3] == 0) {                  continue;
789          neighbour.push_back(m_mpiInfo->rank+m_NX);              // location of neighbour rank
790          const index_t first=m_nodeDistribution[neighbour.back()];              const int nx=x+i0;
791          offsetInShared.push_back(offsetInShared.back()+m_N0-left);              const int ny=y+i1;
792          for (dim_t i=left; i<m_N0; i++) {              if (nx>=0 && ny>=0 && nx<m_NX && ny<m_NY) {
793              sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);                  neighbour.push_back(ny*m_NX+nx);
794              recvShared.push_back(first+i-left);                  if (i0==0) {
795                        // sharing top or bottom edge
796                        const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
797                        const int firstNode=(i1==-1 ? left : m_N0*(m_N1-1)+left);
798                        offsetInShared.push_back(offsetInShared.back()+nDOF0);
799                        for (dim_t i=0; i<nDOF0; i++, numShared++) {
800                            sendShared.push_back(firstDOF+i);
801                            recvShared.push_back(numDOF+numShared);
802                            if (i>0)
803                                colIndices[firstDOF+i-1].push_back(numShared);
804                            colIndices[firstDOF+i].push_back(numShared);
805                            if (i<nDOF0-1)
806                                colIndices[firstDOF+i+1].push_back(numShared);
807                            m_dofMap[firstNode+i]=numDOF+numShared;
808                        }
809                    } else if (i1==0) {
810                        // sharing left or right edge
811                        const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
812                        const int firstNode=(i0==-1 ? bottom*m_N0 : (bottom+1)*m_N0-1);
813                        offsetInShared.push_back(offsetInShared.back()+nDOF1);
814                        for (dim_t i=0; i<nDOF1; i++, numShared++) {
815                            sendShared.push_back(firstDOF+i*nDOF0);
816                            recvShared.push_back(numDOF+numShared);
817                            if (i>0)
818                                colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
819                            colIndices[firstDOF+i*nDOF0].push_back(numShared);
820                            if (i<nDOF1-1)
821                                colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
822                            m_dofMap[firstNode+i*m_N0]=numDOF+numShared;
823                        }
824                    } else {
825                        // sharing a node
826                        const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
827                        const int node=(i0+1)/2*(m_N0-1)+(i1+1)/2*m_N0*(m_N1-1);
828                        offsetInShared.push_back(offsetInShared.back()+1);
829                        sendShared.push_back(dof);
830                        recvShared.push_back(numDOF+numShared);
831                        colIndices[dof].push_back(numShared);
832                        m_dofMap[node]=numDOF+numShared;
833                        ++numShared;
834                    }
835                }
836          }          }
837      }      }
838      // corner node from top-right
if (faces[1] == 0 && faces[3] == 0) {
neighbour.push_back(m_mpiInfo->rank+m_NX+1);
const index_t first=m_nodeDistribution[neighbour.back()];
offsetInShared.push_back(offsetInShared.back()+1);
sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
recvShared.push_back(first);
}
const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
839      /*      /*
840      cout << "--- rcv_shcomp ---" << endl;      cout << "--- rcv_shcomp ---" << endl;
841      cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;      cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
# Line 831  Paso_SystemMatrixPattern* Rectangle::get Line 850  Paso_SystemMatrixPattern* Rectangle::get
850      for (size_t i=0; i<sendShared.size(); i++) {      for (size_t i=0; i<sendShared.size(); i++) {
851          cout << "shared[" << i << "]=" << sendShared[i] << endl;          cout << "shared[" << i << "]=" << sendShared[i] << endl;
852      }      }
853        cout << "--- dofMap ---" << endl;
854        for (size_t i=0; i<m_dofMap.size(); i++) {
855            cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
856        }
857        cout << "--- colIndices ---" << endl;
858        for (size_t i=0; i<colIndices.size(); i++) {
859            cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
860        }
861      */      */
862
863      Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(      Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
# Line 875  Paso_SystemMatrixPattern* Rectangle::get Line 902  Paso_SystemMatrixPattern* Rectangle::get
902      }      }
903      */      */
904
905      ptr.clear();      // column & row couple patterns
906        ptr.assign(1, 0);
907      index.clear();      index.clear();
908
909      // column & row couple patterns      for (index_t i=0; i<numDOF; i++) {
910      Paso_Pattern *colCouplePattern, *rowCouplePattern;          index.insert(index.end(), colIndices[i].begin(), colIndices[i].end());
911      generateCouplePatterns(&colCouplePattern, &rowCouplePattern);          ptr.push_back(ptr.back()+colIndices[i].size());
912        }
913
914        // paso will manage the memory
915        indexC = MEMALLOC(index.size(), index_t);
916        ptrC = MEMALLOC(ptr.size(), index_t);
917        copy(index.begin(), index.end(), indexC);
918        copy(ptr.begin(), ptr.end(), ptrC);
919        M=ptr.size()-1;
920        N=numShared;
921        Paso_Pattern *colCouplePattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
922                M, N, ptrC, indexC);
923
924        /*
925        cout << "--- colCouple_pattern ---" << endl;
926        cout << "M=" << M << ", N=" << N << endl;
927        for (size_t i=0; i<ptr.size(); i++) {
928            cout << "ptr[" << i << "]=" << ptr[i] << endl;
929        }
930        for (size_t i=0; i<index.size(); i++) {
931            cout << "index[" << i << "]=" << index[i] << endl;
932        }
933        */
934
935        // now build the row couple pattern
936        IndexVector ptr2(1,0);
937        IndexVector index2;
938        for (dim_t id=0; id<N; id++) {
939            dim_t n=0;
940            for (dim_t i=0; i<M; i++) {
941                for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
942                    if (index[j]==id) {
943                        index2.push_back(i);
944                        n++;
945                        break;
946                    }
947                }
948            }
949            ptr2.push_back(ptr2.back()+n);
950        }
951
952        // paso will manage the memory
953        indexC = MEMALLOC(index2.size(), index_t);
954        ptrC = MEMALLOC(ptr2.size(), index_t);
955        copy(index2.begin(), index2.end(), indexC);
956        copy(ptr2.begin(), ptr2.end(), ptrC);
957        Paso_Pattern *rowCouplePattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
958                N, M, ptrC, indexC);
959
960        /*
961        cout << "--- rowCouple_pattern ---" << endl;
962        cout << "M=" << N << ", N=" << M << endl;
963        for (size_t i=0; i<ptr2.size(); i++) {
964            cout << "ptr[" << i << "]=" << ptr2[i] << endl;
965        }
966        for (size_t i=0; i<index2.size(); i++) {
967            cout << "index[" << i << "]=" << index2[i] << endl;
968        }
969        */
970
971      // allocate paso distribution      // allocate paso distribution
972      Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,      Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
# Line 959  pair<double,double> Rectangle::getFirstC Line 1045  pair<double,double> Rectangle::getFirstC
1045  }  }
1046
1047  //protected  //protected
1048    dim_t Rectangle::getNumDOF() const
1049    {
1050        return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY;
1051    }
1052
1053    //protected
1054  dim_t Rectangle::getNumFaceElements() const  dim_t Rectangle::getNumFaceElements() const
1055  {  {
1056      const IndexVector faces = getNumFacesPerBoundary();      const IndexVector faces = getNumFacesPerBoundary();
# Line 994  void Rectangle::assembleCoordinates(escr Line 1086  void Rectangle::assembleCoordinates(escr
1086  //private  //private
1087  void Rectangle::populateSampleIds()  void Rectangle::populateSampleIds()
1088  {  {
1089      // identifiers are ordered from left to right, bottom to top on each rank,      // identifiers are ordered from left to right, bottom to top globablly.
// except for the shared nodes which are owned by the rank below / to the
// left of the current rank
1090
1091      // build node distribution vector first.      // build node distribution vector first.
// m_nodeDistribution[i] is the first node id on rank i, that is
1092      // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes      // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1093      m_nodeDistribution.assign(m_mpiInfo->size+1, 0);      m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1094      m_nodeDistribution[1]=getNumNodes();      const dim_t numDOF=getNumDOF();
1095      for (dim_t k=1; k<m_mpiInfo->size-1; k++) {      for (dim_t k=1; k<m_mpiInfo->size; k++) {
1096          const index_t x=k%m_NX;          m_nodeDistribution[k]=k*numDOF;
const index_t y=k/m_NX;
index_t numNodes=getNumNodes();
if (x>0)
numNodes-=m_N1;
if (y>0)
numNodes-=m_N0;
if (x>0 && y>0)
numNodes++; // subtracted corner twice -> fix that
m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1097      }      }
1098      m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();      m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();

1099      m_nodeId.resize(getNumNodes());      m_nodeId.resize(getNumNodes());
1100        m_dofId.resize(numDOF);
1101        m_elementId.resize(getNumElements());
1102        m_faceId.resize(getNumFaceElements());
1103
1104      // the bottom row and left column are not owned by this rank so the  #pragma omp parallel
1105      // identifiers need to be computed accordingly      {
1106      const index_t left = (m_offset0==0 ? 0 : 1);          // nodes
1107      const index_t bottom = (m_offset1==0 ? 0 : 1);  #pragma omp for nowait
1108      if (left>0) {          for (dim_t i1=0; i1<m_N1; i1++) {
1109          const int neighbour=m_mpiInfo->rank-1;              for (dim_t i0=0; i0<m_N0; i0++) {
1110          const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);                  m_nodeId[i0+i1*m_N0] = (m_offset1+i1)*(m_gNE0+1)+m_offset0+i0;
1111  #pragma omp parallel for              }
for (dim_t i1=bottom; i1<m_N1; i1++) {
m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
+ (i1-bottom+1)*leftN0-1;
}
}
if (bottom>0) {
const int neighbour=m_mpiInfo->rank-m_NX;
const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
#pragma omp parallel for
for (dim_t i0=left; i0<m_N0; i0++) {
m_nodeId[i0]=m_nodeDistribution[neighbour]
+ (bottomN1-1)*bottomN0 + i0 - left;
1112          }          }
}
if (left>0 && bottom>0) {
const int neighbour=m_mpiInfo->rank-m_NX-1;
const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
}
1113
1114      // the rest of the id's are contiguous          // degrees of freedom
1115      const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];  #pragma omp for nowait
1116  #pragma omp parallel for          for (dim_t k=0; k<numDOF; k++)
1117      for (dim_t i1=bottom; i1<m_N1; i1++) {              m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1118          for (dim_t i0=left; i0<m_N0; i0++) {
1119              m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);          // elements
1120          }  #pragma omp for nowait
1121      }          for (dim_t k=0; k<getNumElements(); k++)
1122                m_elementId[k]=k;
1123
1124            // face elements
1125    #pragma omp for
1126            for (dim_t k=0; k<getNumFaceElements(); k++)
1127                m_faceId[k]=k;
1128        } // end parallel section
1129
1130      m_nodeTags.assign(getNumNodes(), 0);      m_nodeTags.assign(getNumNodes(), 0);
1131      updateTagsInUse(Nodes);      updateTagsInUse(Nodes);
1132
// elements
m_elementId.resize(getNumElements());
#pragma omp parallel for
for (dim_t k=0; k<getNumElements(); k++) {
m_elementId[k]=k;
}
1133      m_elementTags.assign(getNumElements(), 0);      m_elementTags.assign(getNumElements(), 0);
1134      updateTagsInUse(Elements);      updateTagsInUse(Elements);
1135
// face element id's
m_faceId.resize(getNumFaceElements());
#pragma omp parallel for
for (dim_t k=0; k<getNumFaceElements(); k++) {
m_faceId[k]=k;
}

1136      // generate face offset vector and set face tags      // generate face offset vector and set face tags
1137      const IndexVector facesPerEdge = getNumFacesPerBoundary();      const IndexVector facesPerEdge = getNumFacesPerBoundary();
1138      const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;      const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
# Line 1100  void Rectangle::populateSampleIds() Line 1157  void Rectangle::populateSampleIds()
1157  //private  //private
1158  int Rectangle::insertNeighbours(IndexVector& index, index_t node) const  int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1159  {  {
1160      const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);      const index_t nDOF0 = (m_gNE0+1)/m_NX;
1161      const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);      const index_t nDOF1 = (m_gNE1+1)/m_NY;
1162      const int x=node%myN0;      const int x=node%nDOF0;
1163      const int y=node/myN0;      const int y=node/nDOF0;
1164      int num=0;      int num=0;
1165      if (y>0) {      // loop through potential neighbours and add to index if positions are
1166          if (x>0) {      // within bounds
1167              // bottom-left      for (int i1=-1; i1<2; i1++) {
1168              index.push_back(node-myN0-1);          for (int i0=-1; i0<2; i0++) {
1169              num++;              // skip node itself
1170          }              if (i0==0 && i1==0)
1171          // bottom                  continue;
1172          index.push_back(node-myN0);              // location of neighbour node
1173          num++;              const int nx=x+i0;
1174          if (x<myN0-1) {              const int ny=y+i1;
1175              // bottom-right              if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
1176              index.push_back(node-myN0+1);                  index.push_back(ny*nDOF0+nx);
1177              num++;                  num++;
1178          }              }
}
if (x<myN0-1) {
// right
index.push_back(node+1);
num++;
if (y<myN1-1) {
// top-right
index.push_back(node+myN0+1);
num++;
}
}
if (y<myN1-1) {
// top
index.push_back(node+myN0);
num++;
if (x>0) {
// top-left
index.push_back(node+myN0-1);
num++;
1179          }          }
1180      }      }
if (x>0) {
// left
index.push_back(node-1);
num++;
}
1181
1182      return num;      return num;
1183  }  }
1184
1185  //private  //protected
1186  void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const  void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const
1187  {  {
1188      IndexVector ptr(1,0);      const dim_t numComp = in.getDataPointSize();
1189      IndexVector index;      out.requireWrite();
const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
const IndexVector faces=getNumFacesPerBoundary();

// bottom edge
dim_t n=0;
if (faces[0] == 0) {
index.push_back(2*(myN0+myN1+1));
index.push_back(2*(myN0+myN1+1)+1);
n+=2;
if (faces[2] == 0) {
index.push_back(0);
index.push_back(1);
index.push_back(2);
n+=3;
}
} else if (faces[2] == 0) {
index.push_back(1);
index.push_back(2);
n+=2;
}
// n=neighbours of bottom-left corner node
ptr.push_back(ptr.back()+n);
n=0;
if (faces[2] == 0) {
for (dim_t i=1; i<myN0-1; i++) {
index.push_back(i);
index.push_back(i+1);
index.push_back(i+2);
ptr.push_back(ptr.back()+3);
}
index.push_back(myN0-1);
index.push_back(myN0);
n+=2;
if (faces[1] == 0) {
index.push_back(myN0+1);
index.push_back(myN0+2);
index.push_back(myN0+3);
n+=3;
}
} else {
for (dim_t i=1; i<myN0-1; i++) {
ptr.push_back(ptr.back());
}
if (faces[1] == 0) {
index.push_back(myN0+2);
index.push_back(myN0+3);
n+=2;
}
}
// n=neighbours of bottom-right corner node
ptr.push_back(ptr.back()+n);
1190
1191      // 2nd row to 2nd last row      const index_t left = (m_offset0==0 ? 0 : 1);
1192      for (dim_t i=1; i<myN1-1; i++) {      const index_t bottom = (m_offset1==0 ? 0 : 1);
1193          // left edge      const index_t right = (m_mpiInfo->rank%m_NX==m_NX-1 ? m_N0 : m_N0-1);
1194          if (faces[0] == 0) {      const index_t top = (m_mpiInfo->rank/m_NX==m_NY-1 ? m_N1 : m_N1-1);
1195              index.push_back(2*(myN0+myN1+2)-i);      index_t n=0;
1196              index.push_back(2*(myN0+myN1+2)-i-1);      for (index_t i=bottom; i<top; i++) {
1197              index.push_back(2*(myN0+myN1+2)-i-2);          for (index_t j=left; j<right; j++, n++) {
1198              ptr.push_back(ptr.back()+3);              const double* src=in.getSampleDataRO(j+i*m_N0);
1199          } else {              copy(src, src+numComp, out.getSampleDataRW(n));
ptr.push_back(ptr.back());
}
for (dim_t j=1; j<myN0-1; j++) {
ptr.push_back(ptr.back());
}
// right edge
if (faces[1] == 0) {
index.push_back(myN0+i+1);
index.push_back(myN0+i+2);
index.push_back(myN0+i+3);
ptr.push_back(ptr.back()+3);
} else {
ptr.push_back(ptr.back());
1200          }          }
1201      }      }
1202    }
1203
1204      // top edge  //protected
1206      if (faces[0] == 0) {         const IndexVector& nodes_Eq, dim_t num_Eq, const IndexVector& nodes_Sol,
1207          index.push_back(2*myN0+myN1+5);         dim_t num_Sol, const vector<double>& array) const
1208          index.push_back(2*myN0+myN1+4);  {
1209          n+=2;      const dim_t numMyCols = mat->pattern->mainPattern->numInput;
1210          if (faces[3] == 0) {      const dim_t numMyRows = mat->pattern->mainPattern->numOutput;
1211              index.push_back(2*myN0+myN1+3);
1212              index.push_back(2*myN0+myN1+2);      const index_t* mainBlock_ptr = mat->mainBlock->pattern->ptr;
1213              index.push_back(2*myN0+myN1+1);      const index_t* mainBlock_index = mat->mainBlock->pattern->index;
1214              n+=3;      double* mainBlock_val = mat->mainBlock->val;
1215          }      const index_t* col_coupleBlock_ptr = mat->col_coupleBlock->pattern->ptr;
1216      } else if (faces[3] == 0) {      const index_t* col_coupleBlock_index = mat->col_coupleBlock->pattern->index;
1217          index.push_back(2*myN0+myN1+2);      double* col_coupleBlock_val = mat->col_coupleBlock->val;
1218          index.push_back(2*myN0+myN1+1);      const index_t* row_coupleBlock_ptr = mat->row_coupleBlock->pattern->ptr;
1219          n+=2;      const index_t* row_coupleBlock_index = mat->row_coupleBlock->pattern->index;
1220      }      double* row_coupleBlock_val = mat->row_coupleBlock->val;
// n=neighbours of top-left corner node
ptr.push_back(ptr.back()+n);
n=0;
if (faces[3] == 0) {
for (dim_t i=1; i<myN0-1; i++) {
index.push_back(2*myN0+myN1+i+1);
index.push_back(2*myN0+myN1+i);
index.push_back(2*myN0+myN1+i-1);
ptr.push_back(ptr.back()+3);
}
index.push_back(myN0+myN1+4);
index.push_back(myN0+myN1+3);
n+=2;
if (faces[1] == 0) {
index.push_back(myN0+myN1+2);
index.push_back(myN0+myN1+1);
index.push_back(myN0+myN1);
n+=3;
}
} else {
for (dim_t i=1; i<myN0-1; i++) {
ptr.push_back(ptr.back());
}
if (faces[1] == 0) {
index.push_back(myN0+myN1+1);
index.push_back(myN0+myN1);
n+=2;
}
}
// n=neighbours of top-right corner node
ptr.push_back(ptr.back()+n);
1221
1222      dim_t M=ptr.size()-1;      for (dim_t k_Eq = 0; k_Eq < nodes_Eq.size(); ++k_Eq) {
1223      map<index_t,index_t> idMap;          // down columns of array
1224      dim_t N=0;          const dim_t j_Eq = nodes_Eq[k_Eq];
1225      for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {          const dim_t i_row = j_Eq;
1226          if (idMap.find(*it)==idMap.end()) {  //printf("row:%d\n", i_row);
1227              idMap[*it]=N;          // only look at the matrix rows stored on this processor
1228              *it=N++;          if (i_row < numMyRows) {
1229                for (dim_t k_Sol = 0; k_Sol < nodes_Sol.size(); ++k_Sol) {
1230                    const dim_t i_col = nodes_Sol[k_Sol];
1231                    if (i_col < numMyCols) {
1232                        for (dim_t k = mainBlock_ptr[i_row]; k < mainBlock_ptr[i_row + 1]; ++k) {
1233                            if (mainBlock_index[k] == i_col) {
1234                                mainBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())];
1235                                break;
1236                            }
1237                        }
1238                    } else {
1239                        for (dim_t k = col_coupleBlock_ptr[i_row]; k < col_coupleBlock_ptr[i_row + 1]; ++k) {
1240    //cout << "col:" << i_col-numMyCols << " colIdx:" << col_coupleBlock_index[k] << endl;
1241                            if (col_coupleBlock_index[k] == i_col - numMyCols) {
1242                                col_coupleBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())];
1243                                break;
1244                            }
1245                        }
1246                    }
1247                }
1248          } else {          } else {
1249              *it=idMap[*it];              for (dim_t k_Sol = 0; k_Sol < nodes_Sol.size(); ++k_Sol) {
1250          }                  // across rows of array
1251      }                  const dim_t i_col = nodes_Sol[k_Sol];
1252                    if (i_col < numMyCols) {
1253      /*                      for (dim_t k = row_coupleBlock_ptr[i_row - numMyRows];
1254      cout << "--- colCouple_pattern ---" << endl;                           k < row_coupleBlock_ptr[i_row - numMyRows + 1]; ++k)
1255      cout << "M=" << M << ", N=" << N << endl;                      {
1256      for (size_t i=0; i<ptr.size(); i++) {  //cout << "col:" << i_col << " rowIdx:" << row_coupleBlock_index[k] << endl;
1257          cout << "ptr[" << i << "]=" << ptr[i] << endl;                          if (row_coupleBlock_index[k] == i_col) {
1258      }                              row_coupleBlock_val[k] += array[INDEX2(k_Eq, k_Sol, nodes_Eq.size())];
1259      for (size_t i=0; i<index.size(); i++) {                              break;
1260          cout << "index[" << i << "]=" << index[i] << endl;                          }
1261      }                      }
*/

// now build the row couple pattern
IndexVector ptr2(1,0);
IndexVector index2;
for (dim_t id=0; id<N; id++) {
n=0;
for (dim_t i=0; i<M; i++) {
for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
if (index[j]==id) {
index2.push_back(i);
n++;
break;
1262                  }                  }
1263              }              }
1264          }          }
ptr2.push_back(ptr2.back()+n);
}

/*
cout << "--- rowCouple_pattern ---" << endl;
cout << "M=" << N << ", N=" << M << endl;
for (size_t i=0; i<ptr2.size(); i++) {
cout << "ptr[" << i << "]=" << ptr2[i] << endl;
}
for (size_t i=0; i<index2.size(); i++) {
cout << "index[" << i << "]=" << index2[i] << endl;
1265      }      }
*/

// paso will manage the memory
index_t* indexC = MEMALLOC(index.size(), index_t);
index_t* ptrC = MEMALLOC(ptr.size(), index_t);
copy(index.begin(), index.end(), indexC);
copy(ptr.begin(), ptr.end(), ptrC);
*colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);

// paso will manage the memory
indexC = MEMALLOC(index2.size(), index_t);
ptrC = MEMALLOC(ptr2.size(), index_t);
copy(index2.begin(), index2.end(), indexC);
copy(ptr2.begin(), ptr2.end(), ptrC);
*rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1266  }  }
1267
1268  //protected  //protected
# Line 1518  void Rectangle::assemblePDESingle(Paso_S Line 1436  void Rectangle::assemblePDESingle(Paso_S
1436  {  {
1437      const double h0 = m_l0/m_gNE0;      const double h0 = m_l0/m_gNE0;
1438      const double h1 = m_l1/m_gNE1;      const double h1 = m_l1/m_gNE1;
1439      /* GENERATOR SNIP_PDE_SINGLE_PRE TOP */      /*** GENERATOR SNIP_PDE_SINGLE_PRE TOP */
1440      const double w0 = -0.1555021169820365539*h1/h0;      const double w0 = -0.1555021169820365539*h1/h0;
1441      const double w1 = 0.041666666666666666667;      const double w1 = 0.041666666666666666667;
1442      const double w10 = -0.041666666666666666667*h0/h1;      const double w10 = -0.041666666666666666667*h0/h1;
# Line 1600  void Rectangle::assemblePDESingle(Paso_S Line 1518  void Rectangle::assemblePDESingle(Paso_S
1518      rhs.requireWrite();      rhs.requireWrite();
1519  #pragma omp parallel  #pragma omp parallel
1520      {      {
1521          for (index_t k1_0=0; k1_0<2; k1_0++) { // coloring          for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring
1522  #pragma omp for  #pragma omp for
1523              for (index_t k1=k1_0; k1<m_NE1; k1+=2) {              for (index_t k1=k1_0; k1<m_NE1; k1+=2) {
1524                  for (index_t k0=0; k0<m_NE0; ++k0)  {                  for (index_t k0=0; k0<m_NE0; ++k0)  {
# Line 1609  void Rectangle::assemblePDESingle(Paso_S Line 1527  void Rectangle::assemblePDESingle(Paso_S
1527                      vector<double> EM_S(4*4, 0);                      vector<double> EM_S(4*4, 0);
1528                      vector<double> EM_F(4, 0);                      vector<double> EM_F(4, 0);
1529                      const index_t e = k0 + m_NE0*k1;                      const index_t e = k0 + m_NE0*k1;
1530                      /* GENERATOR SNIP_PDE_SINGLE TOP */                      /*** GENERATOR SNIP_PDE_SINGLE TOP */
1531                      ///////////////                      ///////////////
1532                      // process A //                      // process A //
1533                      ///////////////                      ///////////////
# Line 2220  void Rectangle::assemblePDESingle(Paso_S Line 2138  void Rectangle::assemblePDESingle(Paso_S
2138                      /* GENERATOR SNIP_PDE_SINGLE BOTTOM */                      /* GENERATOR SNIP_PDE_SINGLE BOTTOM */
2139
2141                        const index_t firstNode=m_N0*k1+k0;
2142                      IndexVector rowIndex;                      IndexVector rowIndex;
2143                      const index_t firstNode=k1*m_N0+k0;                      rowIndex.push_back(m_dofMap[firstNode]);
2144                      rowIndex.push_back(firstNode);                      rowIndex.push_back(m_dofMap[firstNode+1]);
2145                      rowIndex.push_back(firstNode+1);                      rowIndex.push_back(m_dofMap[firstNode+m_N0]);
2146                      rowIndex.push_back(firstNode+m_N0);                      rowIndex.push_back(m_dofMap[firstNode+m_N0+1]);
rowIndex.push_back(firstNode+1+m_N0);
addToSystemMatrix(mat, 4, rowIndex, 1, 4, rowIndex, 1, EM_S);
}
2148                            //cout << "-- AddtoRHS -- " << endl;
2149                          double *F_p=rhs.getSampleDataRW(0);                          double *F_p=rhs.getSampleDataRW(0);
2150                          for (index_t i=0; i<4; i++) {                          for (index_t i=0; i<rowIndex.size(); i++) {
2151                              if (rowIndex[i]<getNumNodes())                              if (rowIndex[i]<getNumDOF()) {
2152                                  F_p[rowIndex[i]]+=EM_F[i];                                  F_p[rowIndex[i]]+=EM_F[i];
2153                                    //cout << "F[" << rowIndex[i] << "]=" << F_p[rowIndex[i]] << endl;
2154                                }
2155                          }                          }
2156                            //cout << "---"<<endl;
2157                        }
2159                            //cout << "-- AddtoSystem -- " << endl;
2160                            addToSystemMatrix(mat, rowIndex, 1, rowIndex, 1, EM_S);
2161                      }                      }
2162
2163                  } // end k0 loop                  } // end k0 loop
2164              } // end k1 loop              } // end k1 loop
2165          } // end of coloring          } // end of coloring
2166      } // end of parallel region      } // end of parallel region
2167  }  }
2168
const IndexVector& nodes_Eq, dim_t num_Eq, dim_t NN_Sol,
const IndexVector& nodes_Sol, dim_t num_Sol, const vector<double>& array) const
{
const dim_t row_block_size = in->row_block_size;
const dim_t col_block_size = in->col_block_size;
const dim_t block_size = in->block_size;
const dim_t num_subblocks_Eq = num_Eq / row_block_size;
const dim_t num_subblocks_Sol = num_Sol / col_block_size;
const dim_t numMyCols = in->pattern->mainPattern->numInput;
const dim_t numMyRows = in->pattern->mainPattern->numOutput;

const index_t* mainBlock_ptr = in->mainBlock->pattern->ptr;
const index_t* mainBlock_index = in->mainBlock->pattern->index;
double* mainBlock_val = in->mainBlock->val;
const index_t* col_coupleBlock_ptr = in->col_coupleBlock->pattern->ptr;
const index_t* col_coupleBlock_index = in->col_coupleBlock->pattern->index;
double* col_coupleBlock_val = in->col_coupleBlock->val;
const index_t* row_coupleBlock_ptr = in->row_coupleBlock->pattern->ptr;
const index_t* row_coupleBlock_index = in->row_coupleBlock->pattern->index;
double* row_coupleBlock_val = in->row_coupleBlock->val;
for (dim_t k_Eq = 0; k_Eq < NN_Eq; ++k_Eq) {
// down columns of array
const dim_t j_Eq = nodes_Eq[k_Eq];
for (dim_t l_row = 0; l_row < num_subblocks_Eq; ++l_row) {
const dim_t i_row = j_Eq * num_subblocks_Eq + l_row;
// only look at the matrix rows stored on this processor
if (i_row < numMyRows) {
for (dim_t k_Sol = 0; k_Sol < NN_Sol; ++k_Sol) {
// across rows of array
const dim_t j_Sol = nodes_Sol[k_Sol];
for (dim_t l_col = 0; l_col < num_subblocks_Sol; ++l_col) {
// only look at the matrix rows stored on this processor
const dim_t i_col = j_Sol * num_subblocks_Sol + l_col;
if (i_col < numMyCols) {
for (dim_t k = mainBlock_ptr[i_row];
k < mainBlock_ptr[i_row + 1]; ++k) {
if (mainBlock_index[k] == i_col) {
// entry array(k_Sol, j_Eq) is a block
// (row_block_size x col_block_size)
for (dim_t ic = 0; ic < col_block_size; ++ic) {
const dim_t i_Sol = ic + col_block_size * l_col;
for (dim_t ir = 0; ir < row_block_size; ++ir) {
const dim_t i_Eq = ir + row_block_size * l_row;
mainBlock_val[k*block_size + ir + row_block_size*ic] +=
array[INDEX4
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)];
}
}
break;
}
}
} else {
for (dim_t k = col_coupleBlock_ptr[i_row];
k < col_coupleBlock_ptr[i_row + 1]; ++k) {
if (col_coupleBlock_index[k] == i_col - numMyCols) {
// entry array(k_Sol, j_Eq) is a block
// (row_block_size x col_block_size)
for (dim_t ic = 0; ic < col_block_size; ++ic) {
const dim_t i_Sol = ic + col_block_size * l_col;
for (dim_t ir = 0; ir < row_block_size; ++ir) {
const dim_t i_Eq = ir + row_block_size * l_row;
col_coupleBlock_val[k * block_size + ir + row_block_size * ic] +=
array[INDEX4
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)];
}
}
break;
}
}
}
}
}
} else {
for (dim_t k_Sol = 0; k_Sol < NN_Sol; ++k_Sol) {
// across rows of array
const dim_t j_Sol = nodes_Sol[k_Sol];
for (dim_t l_col = 0; l_col < num_subblocks_Sol; ++l_col) {
const dim_t i_col = j_Sol * num_subblocks_Sol + l_col;
if (i_col < numMyCols) {
for (dim_t k = row_coupleBlock_ptr[i_row - numMyRows];
k < row_coupleBlock_ptr[i_row - numMyRows + 1]; ++k)
{
if (row_coupleBlock_index[k] == i_col) {
// entry array(k_Sol, j_Eq) is a block
// (row_block_size x col_block_size)
for (dim_t ic = 0; ic < col_block_size; ++ic) {
const dim_t i_Sol = ic + col_block_size * l_col;
for (dim_t ir = 0; ir < row_block_size; ++ir) {
const dim_t i_Eq = ir + row_block_size * l_row;
row_coupleBlock_val[k * block_size + ir + row_block_size * ic] +=
array[INDEX4
(i_Eq, i_Sol, k_Eq, k_Sol, num_Eq, num_Sol, NN_Eq)];
}
}
break;
}
}
}
}
}
}
}
}
}

2169  } // end of namespace ripley  } // end of namespace ripley
2170

Legend:
 Removed from v.3748 changed lines Added in v.3754