/[escript]/branches/trilinos_from_5897/ripley/src/MultiRectangle.cpp
ViewVC logotype

Diff of /branches/trilinos_from_5897/ripley/src/MultiRectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 6099 by caltinay, Tue Mar 8 02:07:16 2016 UTC revision 6100 by caltinay, Tue Mar 29 04:56:51 2016 UTC
# Line 471  void MultiRectangle::dump(const string& Line 471  void MultiRectangle::dump(const string&
471    
472  void MultiRectangle::populateDofMap()  void MultiRectangle::populateDofMap()
473  {  {
474        // build node distribution vector first.
475        // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes.
476        // Unlike regular ripley domains this is NOT constant for all ranks so
477        // we do an Allgather (we could have also computed per rank but it's a bit
478        // involved)
479        m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
480        dim_t numDOF = getNumDOF();
481        if (m_mpiInfo->size > 1) {
482    #if ESYS_MPI
483            MPI_Allgather(&numDOF, 1, MPI_DIM_T, &m_nodeDistribution[0], 1,
484                          MPI_DIM_T, m_mpiInfo->comm);
485    
486            // accumulate
487            dim_t accu = 0;
488            for (int rank = 0; rank < m_mpiInfo->size; rank++) {
489                const dim_t n = m_nodeDistribution[rank];
490                m_nodeDistribution[rank] = accu;
491                accu += n;
492            }
493            ESYS_ASSERT(accu == getNumDataPointsGlobal(),
494                    "something went wrong computing the DOF distribution!");
495    
496            m_nodeDistribution[m_mpiInfo->size] = accu;
497    #endif
498        } else {
499            m_nodeDistribution[m_mpiInfo->size] = numDOF;
500        }
501    
502        // degrees of freedom are numbered from left to right, bottom to top in
503        // each rank, continuing on the next rank (ranks also go left-right,
504        // bottom-top).
505        // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
506        // helps when writing out data rank after rank.
507    
508        try {
509            m_nodeId.assign(getNumNodes(), -1);
510            m_dofMap.assign(getNumNodes(), -1);
511            m_dofId.assign(numDOF, -1);
512        } catch (const std::length_error& le) {
513            throw RipleyException("The system does not have sufficient memory for a domain of this size.");
514        }
515    
516      const index_t left = getFirstInDim(0);      const index_t left = getFirstInDim(0);
517      const index_t bottom = getFirstInDim(1);      const index_t bottom = getFirstInDim(1);
518      const dim_t nDOF0 = getNumDOFInAxis(0);      const dim_t nDOF0 = getNumDOFInAxis(0);
519      const dim_t nDOF1 = getNumDOFInAxis(1);      const dim_t nDOF1 = getNumDOFInAxis(1);
520      // populate node->DOF mapping with own degrees of freedom.      // populate node->DOF mapping, DOF IDs and own node IDs.
521      // The rest is assigned in the loop further down      // The rest of the node IDs are communicated further down.
     m_dofMap.assign(getNumNodes(), -7);  
522  #pragma omp parallel for  #pragma omp parallel for
523      for (index_t i=bottom; i<bottom+nDOF1; i++) {      for (dim_t i=0; i<nDOF1; i++) {
524          for (index_t j=left; j<left+nDOF0; j++) {          for (dim_t j=0; j<nDOF0; j++) {
525              m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;              const index_t nodeIdx = j+left + (i+bottom)*m_NN[0];
526                const index_t dofIdx = j + i*nDOF0;
527                m_dofMap[nodeIdx] = dofIdx;
528                m_dofId[dofIdx] = m_nodeId[nodeIdx]
529                    = m_nodeDistribution[m_mpiInfo->rank] + dofIdx;
530          }          }
531      }      }
532    
533      // build list of shared components and neighbours by looping through      // build list of shared components and neighbours
534      // all potential neighbouring ranks and checking if positions are      m_colIndices.assign(numDOF, IndexVector());
535      // within bounds      m_rowIndices.assign(getNumNodes() - numDOF, IndexVector());
     const dim_t numDOF=nDOF0*nDOF1;  
     m_colIndices.clear();  
     m_rowIndices.clear();  
     m_colIndices.resize(numDOF);  
     m_rowIndices.resize(getNumNodes() - numDOF);  
536    
537      RankVector neighbour;      RankVector neighbour;
538      IndexVector offsetInSharedSend(1,0);      IndexVector offsetInSharedSend(1,0);
539      IndexVector offsetInSharedRecv(1,0);      IndexVector offsetInSharedRecv(1,0);
540      IndexVector sendShared, recvShared;      IndexVector sendShared, recvShared;
541      const int x=m_mpiInfo->rank%m_NX[0];      const int x = m_mpiInfo->rank%m_NX[0];
542      const int y=m_mpiInfo->rank/m_NX[0];      const int y = m_mpiInfo->rank/m_NX[0];
543      // numShared will contain the number of shared DOFs after the following      // numShared will contain the number of shared DOFs after the following
544      // blocks      // blocks
545      dim_t numShared=0;      dim_t numShared = 0;
546      // sharing bottom edge      // sharing bottom edge
547      if (y > 0) {      if (y > 0) {
548          neighbour.push_back((y-1)*m_NX[0] + x);          neighbour.push_back((y-1)*m_NX[0] + x);
549          //joining edge, send and recv          //joining edge, send and recv
550          offsetInSharedSend.push_back(offsetInSharedSend.back()+nDOF0);          offsetInSharedSend.push_back(offsetInSharedSend.back()+nDOF0);
551          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF0*m_subdivisions);          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF0*m_subdivisions);
552          for (dim_t i=0; i < nDOF0; i++, numShared++) {          // add to send only
553            for (dim_t i=0; i < nDOF0; i++) {
554              sendShared.push_back(i);              sendShared.push_back(i);
             recvShared.push_back(numDOF+numShared);  
             m_dofMap[i+left]=numDOF + numShared;  
             const dim_t ind = i;  
             if (i > 0)  
                 doublyLink(m_colIndices, m_rowIndices, ind - 1, numShared);  
             doublyLink(m_colIndices, m_rowIndices, ind, numShared);  
             if (i < nDOF0 - 1)  
                 doublyLink(m_colIndices, m_rowIndices, ind + 1, numShared);  
555          }          }
556            
557          for (unsigned sy = 1; sy < m_subdivisions; sy++) {          for (unsigned sy = 0; sy < m_subdivisions; sy++) {
558              for (dim_t i=0; i < nDOF0; i++, numShared++) {              for (index_t i = 0; i < nDOF0; i++, numShared++) {
559                  recvShared.push_back(numDOF+numShared);                  const index_t nodeIdx = left + i + sy*m_NN[0];
560                  m_dofMap[left + i + sy*m_NN[0]] = numDOF + numShared;                  const index_t dofIdx = i;
561                  const dim_t ind = i;                  recvShared.push_back(nodeIdx);
562                    m_dofMap[nodeIdx] = numDOF + numShared;
563                  if (i > 0)                  if (i > 0)
564                      doublyLink(m_colIndices, m_rowIndices, ind - 1, numShared);                      doublyLink(m_colIndices, m_rowIndices, dofIdx - 1, numShared);
565                  doublyLink(m_colIndices, m_rowIndices, ind, numShared);                  doublyLink(m_colIndices, m_rowIndices, dofIdx, numShared);
566                  if (i < nDOF0 - 1)                  if (i < nDOF0 - 1)
567                      doublyLink(m_colIndices, m_rowIndices, ind + 1, numShared);                      doublyLink(m_colIndices, m_rowIndices, dofIdx + 1, numShared);
568              }              }
569          }          }
570      }      }
# Line 541  void MultiRectangle::populateDofMap() Line 575  void MultiRectangle::populateDofMap()
575          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF0);          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF0);
576          // add to send only          // add to send only
577          for (unsigned sy = 0; sy < m_subdivisions; sy++) {          for (unsigned sy = 0; sy < m_subdivisions; sy++) {
578              for (dim_t i=0; i < nDOF0; i++) {              for (index_t i = 0; i < nDOF0; i++) {
579                  sendShared.push_back(numDOF-nDOF0*(m_subdivisions - sy) + i);                  sendShared.push_back(numDOF-nDOF0*(m_subdivisions - sy) + i);
580              }              }
581          }          }
582          for (dim_t i=0; i < nDOF0; i++, numShared++) {          for (index_t i = 0; i < nDOF0; i++, numShared++) {
583              recvShared.push_back(numDOF+numShared);              const index_t nodeIdx = left + i + m_NN[0]*(m_NN[1]-1);
584              m_dofMap[m_NN[0]*(m_NN[1]-1)+left+i]=numDOF+numShared;              const index_t dofIdx = numDOF - nDOF0 + i;
585              const dim_t ind = numDOF-nDOF0+i;              recvShared.push_back(nodeIdx);
586                m_dofMap[nodeIdx] = numDOF+numShared;
587              if (i > 0)              if (i > 0)
588                  doublyLink(m_colIndices, m_rowIndices, ind - 1, numShared);                  doublyLink(m_colIndices, m_rowIndices, dofIdx - 1, numShared);
589              doublyLink(m_colIndices, m_rowIndices, ind, numShared);              doublyLink(m_colIndices, m_rowIndices, dofIdx, numShared);
590              if (i < nDOF0 - 1)              if (i < nDOF0 - 1)
591                  doublyLink(m_colIndices, m_rowIndices, ind + 1, numShared);                  doublyLink(m_colIndices, m_rowIndices, dofIdx + 1, numShared);
592          }          }
593      }      }
594      // sharing left edge      // sharing left edge
# Line 561  void MultiRectangle::populateDofMap() Line 596  void MultiRectangle::populateDofMap()
596          neighbour.push_back(y*m_NX[0] + x-1);          neighbour.push_back(y*m_NX[0] + x-1);
597          offsetInSharedSend.push_back(offsetInSharedSend.back()+nDOF1);          offsetInSharedSend.push_back(offsetInSharedSend.back()+nDOF1);
598          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF1*m_subdivisions);          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF1*m_subdivisions);
599          for (dim_t i=0; i < nDOF1; i++, numShared++) {          for (index_t i = 0; i < nDOF1; i++) {
600              for (unsigned sx = 0; sx < m_subdivisions - 1; sx++, numShared++) {              const index_t dofIdx = i*nDOF0;
601                  recvShared.push_back(numDOF+numShared);              sendShared.push_back(dofIdx);
602                  m_dofMap[(bottom+i)*m_NN[0] + sx] = numDOF + numShared;              for (unsigned sx = 0; sx < m_subdivisions; sx++, numShared++) {
603                  const dim_t ind = i*nDOF0;                  const index_t nodeIdx = (bottom+i)*m_NN[0] + sx;
604                    recvShared.push_back(nodeIdx);
605                    m_dofMap[nodeIdx] = numDOF + numShared;
606                  if (i > 0)                  if (i > 0)
607                      doublyLink(m_colIndices, m_rowIndices, ind - nDOF0, numShared);                      doublyLink(m_colIndices, m_rowIndices, dofIdx - nDOF0, numShared);
608                  doublyLink(m_colIndices, m_rowIndices, ind, numShared);                  doublyLink(m_colIndices, m_rowIndices, dofIdx, numShared);
609                  if (i < nDOF1 - 1)                  if (i < nDOF1 - 1)
610                      doublyLink(m_colIndices, m_rowIndices, ind + nDOF0, numShared);                      doublyLink(m_colIndices, m_rowIndices, dofIdx + nDOF0, numShared);
611              }              }
             sendShared.push_back(i*nDOF0);  
             recvShared.push_back(numDOF + numShared);  
             m_dofMap[(bottom+i)*m_NN[0] + m_subdivisions - 1]=numDOF + numShared;  
             const dim_t ind = i*nDOF0;  
             if (i > 0)  
                 doublyLink(m_colIndices, m_rowIndices, ind - nDOF0, numShared);  
             doublyLink(m_colIndices, m_rowIndices, ind, numShared);  
             if (i < nDOF1 - 1)  
                 doublyLink(m_colIndices, m_rowIndices, ind + nDOF0, numShared);  
612          }          }
613      }      }
614      // sharing right edge      // sharing right edge
# Line 588  void MultiRectangle::populateDofMap() Line 616  void MultiRectangle::populateDofMap()
616          neighbour.push_back(y*m_NX[0] + x+1);          neighbour.push_back(y*m_NX[0] + x+1);
617          offsetInSharedSend.push_back(offsetInSharedSend.back()+nDOF1*m_subdivisions);          offsetInSharedSend.push_back(offsetInSharedSend.back()+nDOF1*m_subdivisions);
618          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF1);          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+nDOF1);
619          for (dim_t i=0; i < nDOF1; i++, numShared++) {          for (index_t i = 0; i < nDOF1; i++, numShared++) {
620              for (unsigned sx = 0; sx < m_subdivisions - 1; sx++) {              for (unsigned sx = 0; sx < m_subdivisions - 1; sx++) {
621                  sendShared.push_back((i+1)*nDOF0-(m_subdivisions - sx));                  sendShared.push_back((i+1)*nDOF0-(m_subdivisions - sx));
622              }              }
623              sendShared.push_back((i+1)*nDOF0-1);              const index_t nodeIdx = (bottom+1+i)*m_NN[0] - 1;
624              recvShared.push_back(numDOF+numShared);              const index_t dofIdx = (i+1)*nDOF0 - 1;
625              m_dofMap[(bottom+1+i)*m_NN[0]- 1]=numDOF+numShared;              sendShared.push_back(dofIdx);
626              const dim_t ind = (i+1)*nDOF0 - 1;              recvShared.push_back(nodeIdx);
627                m_dofMap[nodeIdx] = numDOF + numShared;
628              if (i > 0)              if (i > 0)
629                  doublyLink(m_colIndices, m_rowIndices, ind - nDOF0, numShared);                  doublyLink(m_colIndices, m_rowIndices, dofIdx - nDOF0, numShared);
630              doublyLink(m_colIndices, m_rowIndices, ind, numShared);              doublyLink(m_colIndices, m_rowIndices, dofIdx, numShared);
631              if (i < nDOF1 - 1)              if (i < nDOF1 - 1)
632                  doublyLink(m_colIndices, m_rowIndices, ind + nDOF0, numShared);                  doublyLink(m_colIndices, m_rowIndices, dofIdx + nDOF0, numShared);
633          }          }
634      }      }
635      // sharing bottom-left node      // sharing bottom-left block
636      if (x > 0 && y > 0) {      if (x > 0 && y > 0) {
637          neighbour.push_back((y-1)*m_NX[0] + x-1);          neighbour.push_back((y-1)*m_NX[0] + x-1);
638          // sharing a node          // sharing a node
# Line 611  void MultiRectangle::populateDofMap() Line 640  void MultiRectangle::populateDofMap()
640          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+m_subdivisions*m_subdivisions);          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+m_subdivisions*m_subdivisions);
641          for (unsigned sy = 0; sy < m_subdivisions; sy++) {          for (unsigned sy = 0; sy < m_subdivisions; sy++) {
642              for (unsigned sx = 0; sx < m_subdivisions; sx++, numShared++) {              for (unsigned sx = 0; sx < m_subdivisions; sx++, numShared++) {
643                  m_dofMap[sx + sy*m_NN[0]] = numDOF + numShared;                  const index_t nodeIdx = sx + sy*m_NN[0];
644                  recvShared.push_back(numDOF+numShared);                  m_dofMap[nodeIdx] = numDOF + numShared;
645                    recvShared.push_back(nodeIdx);
646                  doublyLink(m_colIndices, m_rowIndices, 0, numShared);                  doublyLink(m_colIndices, m_rowIndices, 0, numShared);
647              }              }
648          }          }
649          sendShared.push_back(0);          sendShared.push_back(0);
650      }      }
651      // sharing top-left node      // sharing top-left block
652      if (x > 0 && y < m_NX[1]-1) {      if (x > 0 && y < m_NX[1]-1) {
653          neighbour.push_back((y+1)*m_NX[0] + x-1);          neighbour.push_back((y+1)*m_NX[0] + x-1);
654          offsetInSharedSend.push_back(offsetInSharedSend.back()+m_subdivisions);          offsetInSharedSend.push_back(offsetInSharedSend.back()+m_subdivisions);
655          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+m_subdivisions);          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+m_subdivisions);
656          for (int s = 0; s < m_subdivisions; s++, numShared++) {          for (int s = 0; s < m_subdivisions; s++, numShared++) {
657              sendShared.push_back(numDOF - (m_subdivisions - s)*nDOF0);              const index_t nodeIdx = m_NN[0]*(m_NN[1]-1) + s;
658              recvShared.push_back(numDOF + numShared);              const index_t dofIdx = numDOF - (m_subdivisions - s)*nDOF0;
659              m_dofMap[m_NN[0]*(m_NN[1]-1) + s] = numDOF + numShared;              sendShared.push_back(dofIdx);
660                recvShared.push_back(nodeIdx);
661                m_dofMap[nodeIdx] = numDOF + numShared;
662              if (s > 0)              if (s > 0)
663                  doublyLink(m_colIndices, m_rowIndices, numDOF - (m_subdivisions - s + 1)*nDOF0, numShared);                  doublyLink(m_colIndices, m_rowIndices, dofIdx - nDOF0, numShared);
664              doublyLink(m_colIndices, m_rowIndices, numDOF - (m_subdivisions - s)*nDOF0, numShared);              doublyLink(m_colIndices, m_rowIndices, dofIdx, numShared);
665              if (s < m_subdivisions - 1)              if (s < m_subdivisions - 1)
666                  doublyLink(m_colIndices, m_rowIndices, numDOF - (m_subdivisions - s - 1)*nDOF0, numShared);                              doublyLink(m_colIndices, m_rowIndices, dofIdx + nDOF0, numShared);            
667          }          }
668      }      }
669      // sharing bottom-right node      // sharing bottom-right block
670      if (x < m_NX[0]-1 && y > 0) {      if (x < m_NX[0]-1 && y > 0) {
671          neighbour.push_back((y-1)*m_NX[0] + x+1);          neighbour.push_back((y-1)*m_NX[0] + x+1);
672          offsetInSharedSend.push_back(offsetInSharedSend.back()+m_subdivisions);          offsetInSharedSend.push_back(offsetInSharedSend.back()+m_subdivisions);
673          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+m_subdivisions);          offsetInSharedRecv.push_back(offsetInSharedRecv.back()+m_subdivisions);
674          for (int s = 0; s < m_subdivisions; s++, numShared++) {          for (int s = 0; s < m_subdivisions; s++, numShared++) {
675              recvShared.push_back(numDOF+numShared);              const index_t nodeIdx = (s+1)*m_NN[0] - 1;
             m_dofMap[(s+1)*m_NN[0] - 1] = numDOF + numShared;  
             sendShared.push_back(nDOF0-(m_subdivisions-s));  
676              const dim_t ind = nDOF0 - (m_subdivisions - s);              const dim_t ind = nDOF0 - (m_subdivisions - s);
677                recvShared.push_back(nodeIdx);
678                m_dofMap[nodeIdx] = numDOF + numShared;
679                sendShared.push_back(ind);
680              if (s > 0)              if (s > 0)
681                  doublyLink(m_colIndices, m_rowIndices, ind - 1, numShared);                  doublyLink(m_colIndices, m_rowIndices, ind - 1, numShared);
682              doublyLink(m_colIndices, m_rowIndices, ind, numShared);              doublyLink(m_colIndices, m_rowIndices, ind, numShared);
# Line 651  void MultiRectangle::populateDofMap() Line 684  void MultiRectangle::populateDofMap()
684                  doublyLink(m_colIndices, m_rowIndices, ind + 1, numShared);                  doublyLink(m_colIndices, m_rowIndices, ind + 1, numShared);
685          }          }
686      }      }
687      // sharing top-right node      // sharing top-right block
688      if (x < m_NX[0]-1 && y < m_NX[1]-1) {      if (x < m_NX[0]-1 && y < m_NX[1]-1) {
689          neighbour.push_back((y+1)*m_NX[0] + x+1);          neighbour.push_back((y+1)*m_NX[0] + x+1);
690          offsetInSharedSend.push_back(offsetInSharedSend.back()+m_subdivisions*m_subdivisions);          offsetInSharedSend.push_back(offsetInSharedSend.back()+m_subdivisions*m_subdivisions);
# Line 661  void MultiRectangle::populateDofMap() Line 694  void MultiRectangle::populateDofMap()
694                  sendShared.push_back(numDOF-(m_subdivisions - sy - 1)*nDOF0 - (m_subdivisions - sx));                  sendShared.push_back(numDOF-(m_subdivisions - sy - 1)*nDOF0 - (m_subdivisions - sx));
695              }              }
696          }          }
697          recvShared.push_back(numDOF+numShared);          const dim_t nodeIdx = m_NN[0]*m_NN[1] - 1;
698          m_dofMap[m_NN[0]*m_NN[1]-1]=numDOF+numShared;          recvShared.push_back(nodeIdx);
699            m_dofMap[nodeIdx] = numDOF+numShared;
700          doublyLink(m_colIndices, m_rowIndices, numDOF-1, numShared);          doublyLink(m_colIndices, m_rowIndices, numDOF-1, numShared);
701          ++numShared;          ++numShared;
702      }      }
703    
704    #ifdef ESYS_MPI
705        if (m_mpiInfo->size > 1) {
706            // now send off shared DOF IDs so nodeId will become a global node
707            // labelling.
708            const dim_t numSend = offsetInSharedSend.back();
709            const dim_t numRecv = offsetInSharedRecv.back();
710            IndexVector recvBuffer(numRecv);
711            IndexVector sendBuffer(numSend);
712            std::vector<MPI_Request> reqs(2*neighbour.size());
713            std::vector<MPI_Status> stats(2*neighbour.size());
714    
715            // prepare the send buffer
716    #pragma omp parallel for
717            for (index_t i = 0; i < numSend; ++i) {
718                sendBuffer[i] = m_dofId[sendShared[i]];
719            }
720            for (index_t i = 0; i < neighbour.size(); i++) {
721                MPI_Irecv(&recvBuffer[offsetInSharedRecv[i]],
722                        offsetInSharedRecv[i+1] - offsetInSharedRecv[i],
723                        MPI_DIM_T, neighbour[i],
724                        m_mpiInfo->counter()+neighbour[i],
725                        m_mpiInfo->comm, &reqs[2*i]);
726                MPI_Issend(&sendBuffer[offsetInSharedSend[i]],
727                        offsetInSharedSend[i+1] - offsetInSharedSend[i],
728                        MPI_DIM_T, neighbour[i],
729                        m_mpiInfo->counter()+m_mpiInfo->rank, m_mpiInfo->comm,
730                        &reqs[2*i+1]);
731            }
732            m_mpiInfo->incCounter(m_mpiInfo->size);
733    
734            // do something else here...
735    
736            MPI_Waitall(2*neighbour.size(), &reqs[0], &stats[0]);
737    
738            // now populate rest of node IDs
739    #pragma omp parallel for
740            for (index_t i=0; i < numRecv; i++) {
741                const index_t nodeIdx = recvShared[i];
742                m_nodeId[nodeIdx] = recvBuffer[m_dofMap[nodeIdx]-numDOF];
743            }
744        }
745    #endif // ESYS_MPI
746    
747      // TODO: paso::SharedComponents should take vectors to avoid this      // TODO: paso::SharedComponents should take vectors to avoid this
748      int* neighPtr = NULL;      int* neighPtr = NULL;
749      index_t* sendPtr = NULL;      index_t* sendPtr = NULL;
# Line 676  void MultiRectangle::populateDofMap() Line 753  void MultiRectangle::populateDofMap()
753          sendPtr = &sendShared[0];          sendPtr = &sendShared[0];
754          recvPtr = &recvShared[0];          recvPtr = &recvShared[0];
755      }      }
756      // create connector      // create Paso connector
757      paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(      paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
758              numDOF, neighbour.size(), neighPtr, sendPtr,              numDOF, neighbour.size(), neighPtr, sendPtr,
759              &offsetInSharedSend[0], 1, 0, m_mpiInfo));              &offsetInSharedSend[0], 1, 0, m_mpiInfo));
# Line 688  void MultiRectangle::populateDofMap() Line 765  void MultiRectangle::populateDofMap()
765    
766  void MultiRectangle::populateSampleIds()  void MultiRectangle::populateSampleIds()
767  {  {
768      // degrees of freedom are numbered from left to right, bottom to top in      // label nodes and DOF first
769      // each rank, continuing on the next rank (ranks also go left-right,      populateDofMap();
     // bottom-top).  
     // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which  
     // helps when writing out data rank after rank.  
   
     // build node distribution vector first.  
     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes.  
     // Unlike regular ripley domains this is NOT constant for all ranks so  
     // we do an Allgather (we could have also computed per rank but it's a bit  
     // involved)  
     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);  
     dim_t numDOF=getNumDOF();  
     if (m_mpiInfo->size > 1) {  
 #if ESYS_MPI  
         MPI_Allgather(&numDOF, 1, MPI_DIM_T, &m_nodeDistribution[0], 1,  
                       MPI_DIM_T, m_mpiInfo->comm);  
   
         // accumulate  
         dim_t accu = 0;  
         for (int rank=0; rank<m_mpiInfo->size; rank++) {  
             const dim_t n = m_nodeDistribution[rank];  
             m_nodeDistribution[rank] = accu;  
             accu += n;  
         }  
         ESYS_ASSERT(accu == getNumDataPointsGlobal(),  
                 "something went wrong computing the DOF distribution!");  
770    
771          m_nodeDistribution[m_mpiInfo->size] = accu;      m_elementId.assign(getNumElements(), -1);
 #endif  
     } else {  
         m_nodeDistribution[m_mpiInfo->size] = numDOF;  
     }  
   
     try {  
         m_nodeId.resize(getNumNodes());  
         m_dofId.resize(numDOF);  
         m_elementId.resize(getNumElements());  
     } catch (const std::length_error& le) {  
         throw RipleyException("The system does not have sufficient memory for a domain of this size.");  
     }  
772    
773      // populate face element counts      // populate face element counts
774      //left      //left
# Line 753  void MultiRectangle::populateSampleIds() Line 793  void MultiRectangle::populateSampleIds()
793          m_faceCount[3]=0;          m_faceCount[3]=0;
794    
795      const dim_t NFE = getNumFaceElements();      const dim_t NFE = getNumFaceElements();
     m_faceId.resize(NFE);  
   
     const index_t left = getFirstInDim(0);  
     const index_t bottom = getFirstInDim(1);  
     const dim_t nDOF0 = getNumDOFInAxis(0);  
     const dim_t nDOF1 = getNumDOFInAxis(1);  
796      const dim_t NE0 = m_NE[0];      const dim_t NE0 = m_NE[0];
797      const dim_t NE1 = m_NE[1];      const dim_t NE1 = m_NE[1];
798        m_faceId.resize(NFE);
 #define globalNodeId(x,y) \  
     ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \  
     + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0  
   
     // set corner id's outside the parallel region  
     m_nodeId[0] = globalNodeId(0, 0);  
     m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);  
     m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);  
     m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);  
 #undef globalNodeId  
799    
800  #pragma omp parallel  #pragma omp parallel
801      {      {
802          // populate degrees of freedom and own nodes (identical id)          // populate element IDs
 #pragma omp for nowait  
         for (dim_t i=0; i<nDOF1; i++) {  
             for (dim_t j=0; j<nDOF0; j++) {  
                 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];  
                 const index_t dofIdx=j+i*nDOF0;  
                 m_dofId[dofIdx] = m_nodeId[nodeIdx]  
                     = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;  
             }  
         }  
   
         // populate the rest of the nodes (shared with other ranks)  
         if (m_faceCount[0]==0) { // left column  
 #pragma omp for nowait  
             for (dim_t i=0; i<nDOF1; i++) {  
                 const index_t nodeIdx=(i+bottom)*m_NN[0];  
                 const index_t dofId=(i+1)*nDOF0-1;  
                 m_nodeId[nodeIdx]  
                     = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;  
             }  
         }  
         if (m_faceCount[1]==0) { // right column  
 #pragma omp for nowait  
             for (dim_t i=0; i<nDOF1; i++) {  
                 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;  
                 const index_t dofId=i*nDOF0;  
                 m_nodeId[nodeIdx]  
                     = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;  
             }  
         }  
         if (m_faceCount[2]==0) { // bottom row  
 #pragma omp for nowait  
             for (dim_t i=0; i<nDOF0; i++) {  
                 const index_t nodeIdx=i+left;  
                 const index_t dofId=nDOF0*(nDOF1-1)+i;  
                 m_nodeId[nodeIdx]  
                     = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;  
             }  
         }  
         if (m_faceCount[3]==0) { // top row  
 #pragma omp for nowait  
             for (dim_t i=0; i<nDOF0; i++) {  
                 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;  
                 const index_t dofId=i;  
                 m_nodeId[nodeIdx]  
                     = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;  
             }  
         }  
   
         // populate element id's  
803  #pragma omp for nowait  #pragma omp for nowait
804          for (dim_t i1=0; i1<NE1; i1++) {          for (index_t i1 = 0; i1 < NE1; i1++) {
805              for (dim_t i0=0; i0<NE0; i0++) {              for (index_t i0 = 0; i0 < NE0; i0++) {
806                  m_elementId[i0+i1*NE0]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;                  m_elementId[i0+i1*NE0]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
807              }              }
808          }          }
809    
810          // face elements          // face elements
811  #pragma omp for  #pragma omp for
812          for (dim_t k=0; k<NFE; k++)          for (index_t k = 0; k < NFE; k++)
813              m_faceId[k]=k;              m_faceId[k] = k;
814      } // end parallel section      } // end parallel section
815    
816      m_nodeTags.assign(getNumNodes(), 0);      m_nodeTags.assign(getNumNodes(), 0);
# Line 849  void MultiRectangle::populateSampleIds() Line 824  void MultiRectangle::populateSampleIds()
824      const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };      const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
825      m_faceOffset.assign(4, -1);      m_faceOffset.assign(4, -1);
826      m_faceTags.clear();      m_faceTags.clear();
827      index_t offset=0;      index_t offset = 0;
828      for (size_t i=0; i<4; i++) {      for (size_t i=0; i<4; i++) {
829          if (m_faceCount[i]>0) {          if (m_faceCount[i] > 0) {
830              m_faceOffset[i]=offset;              m_faceOffset[i]=offset;
831              offset+=m_faceCount[i];              offset += m_faceCount[i];
832              m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);              m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
833          }          }
834      }      }
# Line 863  void MultiRectangle::populateSampleIds() Line 838  void MultiRectangle::populateSampleIds()
838      setTagMap("top", TOP);      setTagMap("top", TOP);
839      updateTagsInUse(FaceElements);      updateTagsInUse(FaceElements);
840    
     populateDofMap();  
841  }  }
842    
843  paso::SystemMatrixPattern_ptr MultiRectangle::getPasoMatrixPattern(  paso::SystemMatrixPattern_ptr MultiRectangle::getPasoMatrixPattern(
844                                                      bool reducedRowOrder,                                                      bool reducedRowOrder,
845                                                      bool reducedColOrder) const                                                      bool reducedColOrder) const
846  {  {
847      if (m_pattern.get())      if (!m_pattern) {
848          return m_pattern;          // first call - create pattern, then return
849            const dim_t numDOF = getNumDOF();
850      // first call - create pattern, then return          const dim_t numShared = getNumNodes() - numDOF;
     const dim_t numDOF = getNumDOF();  
     const dim_t numShared = getNumNodes() - numDOF;  
851  #pragma omp parallel for  #pragma omp parallel for
852      for (dim_t i = 0; i < numShared; i++) {          for (index_t i = 0; i < numShared; i++) {
853          sort(m_rowIndices[i].begin(), m_rowIndices[i].end());              sort(m_rowIndices[i].begin(), m_rowIndices[i].end());
854      }          }
855    
856      // create main and couple blocks          // create main and couple blocks
857      paso::Pattern_ptr mainPattern = createPasoPattern(getConnections(), numDOF);          paso::Pattern_ptr mainPattern = createPasoPattern(getConnections(), numDOF);
858      paso::Pattern_ptr colPattern = createPasoPattern(m_colIndices, numShared);          paso::Pattern_ptr colPattern = createPasoPattern(m_colIndices, numShared);
859      paso::Pattern_ptr rowPattern = createPasoPattern(m_rowIndices, numDOF);          paso::Pattern_ptr rowPattern = createPasoPattern(m_rowIndices, numDOF);
860    
861      // allocate paso distribution          // allocate Paso distribution
862      paso::Distribution_ptr distribution(new paso::Distribution(m_mpiInfo,          paso::Distribution_ptr distribution(new paso::Distribution(
863              const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0));                                      m_mpiInfo, &m_nodeDistribution[0], 1, 0));
864    
865      // finally create the system matrix pattern          // finally create the system matrix pattern
866      m_pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,          m_pattern.reset(new paso::SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
867              distribution, distribution, mainPattern, colPattern, rowPattern,                  distribution, distribution, mainPattern, colPattern, rowPattern,
868              m_connector, m_connector));                  m_connector, m_connector));
869        }
870      return m_pattern;      return m_pattern;
871  }  }
872    

Legend:
Removed from v.6099  
changed lines
  Added in v.6100

  ViewVC Help
Powered by ViewVC 1.1.26