1 |
|
2 |
/***************************************************************************** |
3 |
* |
4 |
* Copyright (c) 2003-2016 by The University of Queensland |
5 |
* http://www.uq.edu.au |
6 |
* |
7 |
* Primary Business: Queensland, Australia |
8 |
* Licensed under the Apache License, version 2.0 |
9 |
* http://www.apache.org/licenses/LICENSE-2.0 |
10 |
* |
11 |
* Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
12 |
* Development 2012-2013 by School of Earth Sciences |
13 |
* Development from 2014 by Centre for Geoscience Computing (GeoComp) |
14 |
* |
15 |
*****************************************************************************/ |
16 |
|
17 |
|
18 |
/**************************************************************************** |
19 |
|
20 |
Finley: Mesh: optimizes the distribution of DOFs across processors |
21 |
using ParMETIS. On return a new distribution is given and the globalDOF |
22 |
are relabeled accordingly but the mesh is not redistributed yet. |
23 |
|
24 |
*****************************************************************************/ |
25 |
#include "Mesh.h" |
26 |
#include "IndexList.h" |
27 |
|
28 |
#ifdef ESYS_HAVE_PARMETIS |
29 |
#include <parmetis.h> |
30 |
#ifndef REALTYPEWIDTH |
31 |
typedef float real_t; |
32 |
#endif |
33 |
#endif |
34 |
|
35 |
#include <iostream> |
36 |
#include <boost/scoped_array.hpp> |
37 |
|
38 |
namespace finley { |
39 |
|
40 |
#ifdef ESYS_HAVE_PARMETIS |
41 |
// Checks whether there is any rank which has no vertex. In case |
42 |
// such a rank exists, we don't use parmetis since parmetis requires |
43 |
// that every rank has at least 1 vertex (at line 129 of file |
44 |
// "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if |
45 |
// any rank has no vertex). |
46 |
bool allRanksHaveNodes(escript::JMPI& mpiInfo, const std::vector<index_t>& distribution) |
47 |
{ |
48 |
int ret = 1; |
49 |
|
50 |
if (mpiInfo->rank == 0) { |
51 |
for (int i=0; i<mpiInfo->size; i++) { |
52 |
if (distribution[i+1] == distribution[i]) { |
53 |
ret = 0; |
54 |
break; |
55 |
} |
56 |
} |
57 |
if (ret == 0) { |
58 |
std::cout << "Mesh::optimizeDOFDistribution: " |
59 |
<< "Parmetis is not used since at least one rank has no vertex!" |
60 |
<< std::endl; |
61 |
} |
62 |
} |
63 |
MPI_Bcast(&ret, 1, MPI_INTEGER, 0, mpiInfo->comm); |
64 |
return (ret==1); |
65 |
} |
66 |
#endif |
67 |
|
68 |
|
69 |
/****************************************************************************/ |
70 |
|
71 |
void Mesh::optimizeDOFDistribution(std::vector<index_t>& distribution) |
72 |
{ |
73 |
// these two are not const because of parmetis call |
74 |
int mpiSize=MPIInfo->size; |
75 |
const int myRank=MPIInfo->rank; |
76 |
const index_t myFirstVertex=distribution[myRank]; |
77 |
const index_t myLastVertex=distribution[myRank+1]; |
78 |
const dim_t myNumVertices=myLastVertex-myFirstVertex; |
79 |
|
80 |
// first step is to distribute the elements according to a global X of DOF |
81 |
// len is used for the sending around of partition later on |
82 |
index_t len=0; |
83 |
for (int p=0; p<mpiSize; ++p) |
84 |
len=std::max(len, distribution[p+1]-distribution[p]); |
85 |
std::vector<index_t> partition(len); |
86 |
|
87 |
#ifdef ESYS_HAVE_PARMETIS |
88 |
if (mpiSize>1 && allRanksHaveNodes(MPIInfo, distribution)) { |
89 |
boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]); |
90 |
int dim=Nodes->numDim; |
91 |
// create the adjacency structure xadj and adjncy |
92 |
#pragma omp parallel |
93 |
{ |
94 |
// insert contributions from element matrices into columns index |
95 |
IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(), |
96 |
myFirstVertex, myLastVertex, Elements, |
97 |
Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom); |
98 |
IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(), |
99 |
myFirstVertex, myLastVertex, FaceElements, |
100 |
Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom); |
101 |
IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(), |
102 |
myFirstVertex, myLastVertex, ContactElements, |
103 |
Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom); |
104 |
IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(), |
105 |
myFirstVertex, myLastVertex, Points, |
106 |
Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom); |
107 |
} |
108 |
|
109 |
// create the local matrix pattern |
110 |
const dim_t globalNumVertices=distribution[mpiSize]; |
111 |
paso::Pattern_ptr pattern(paso::Pattern::fromIndexListArray(0, |
112 |
myNumVertices, index_list.get(), 0, globalNumVertices, 0)); |
113 |
// set the coordinates |
114 |
std::vector<real_t> xyz(myNumVertices*dim); |
115 |
#pragma omp parallel for |
116 |
for (index_t i=0; i<Nodes->numNodes; ++i) { |
117 |
const index_t k=Nodes->globalDegreesOfFreedom[i]-myFirstVertex; |
118 |
if (k>=0 && k<myNumVertices) { |
119 |
for (int j=0; j<dim; ++j) |
120 |
xyz[k*dim+j]=static_cast<real_t>(Nodes->Coordinates[INDEX2(j,i,dim)]); |
121 |
} |
122 |
} |
123 |
|
124 |
index_t wgtflag = 0; |
125 |
index_t numflag = 0; |
126 |
index_t ncon = 1; |
127 |
index_t edgecut; |
128 |
index_t impiSize = mpiSize; |
129 |
index_t idim = dim; |
130 |
// options[0]=1 -> non-default values, evaluate rest of options |
131 |
// options[1]=0 -> debug level (no output) |
132 |
// options[2] -> random seed |
133 |
index_t options[3] = { 1, 0, 0 }; |
134 |
std::vector<real_t> tpwgts(ncon*mpiSize, 1.f/mpiSize); |
135 |
std::vector<real_t> ubvec(ncon, 1.05f); |
136 |
ParMETIS_V3_PartGeomKway(&distribution[0], pattern->ptr, pattern->index, |
137 |
NULL, NULL, &wgtflag, &numflag, &idim, &xyz[0], |
138 |
&ncon, &impiSize, &tpwgts[0], &ubvec[0], options, |
139 |
&edgecut, &partition[0], &MPIInfo->comm); |
140 |
} else { |
141 |
for (index_t i=0; i<myNumVertices; ++i) |
142 |
partition[i]=0; // CPU 0 owns all |
143 |
} |
144 |
#else |
145 |
for (index_t i=0; i<myNumVertices; ++i) |
146 |
partition[i]=myRank; // CPU myRank owns all |
147 |
#endif |
148 |
|
149 |
// create a new distribution and labeling of the DOF |
150 |
std::vector<index_t> new_distribution(mpiSize+1, 0); |
151 |
#pragma omp parallel |
152 |
{ |
153 |
std::vector<int> loc_partition_count(mpiSize, 0); |
154 |
#pragma omp for |
155 |
for (index_t i=0; i<myNumVertices; ++i) |
156 |
loc_partition_count[partition[i]]++; |
157 |
#pragma omp critical |
158 |
{ |
159 |
for (int i=0; i<mpiSize; ++i) |
160 |
new_distribution[i]+=loc_partition_count[i]; |
161 |
} |
162 |
} |
163 |
index_t *recvbuf=new index_t[mpiSize*mpiSize]; |
164 |
#ifdef ESYS_MPI |
165 |
// recvbuf will be the concatenation of each CPU's contribution to |
166 |
// new_distribution |
167 |
MPI_Allgather(&new_distribution[0], mpiSize, MPI_DIM_T, recvbuf, mpiSize, |
168 |
MPI_INT, MPIInfo->comm); |
169 |
#else |
170 |
for (int i=0; i<mpiSize; ++i) |
171 |
recvbuf[i]=new_distribution[i]; |
172 |
#endif |
173 |
new_distribution[0]=0; |
174 |
std::vector<index_t> newGlobalDOFID(len); |
175 |
for (int rank=0; rank<mpiSize; rank++) { |
176 |
index_t c=0; |
177 |
for (int i=0; i<myRank; ++i) |
178 |
c+=recvbuf[rank+mpiSize*i]; |
179 |
for (index_t i=0; i<myNumVertices; ++i) { |
180 |
if (rank==partition[i]) { |
181 |
newGlobalDOFID[i]=new_distribution[rank]+c; |
182 |
c++; |
183 |
} |
184 |
} |
185 |
for (int i=myRank+1; i<mpiSize; ++i) |
186 |
c+=recvbuf[rank+mpiSize*i]; |
187 |
new_distribution[rank+1]=new_distribution[rank]+c; |
188 |
} |
189 |
delete[] recvbuf; |
190 |
|
191 |
// now the overlap needs to be created by sending the partition around |
192 |
#ifdef ESYS_MPI |
193 |
int dest = MPIInfo->mod_rank(myRank + 1); |
194 |
int source = MPIInfo->mod_rank(myRank - 1); |
195 |
#endif |
196 |
int current_rank=myRank; |
197 |
std::vector<short> setNewDOFId(Nodes->numNodes, 1); |
198 |
|
199 |
for (int p=0; p<mpiSize; ++p) { |
200 |
const index_t firstVertex=distribution[current_rank]; |
201 |
const index_t lastVertex=distribution[current_rank+1]; |
202 |
#pragma omp parallel for |
203 |
for (index_t i=0; i<Nodes->numNodes; ++i) { |
204 |
const index_t k=Nodes->globalDegreesOfFreedom[i]; |
205 |
if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) { |
206 |
Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex]; |
207 |
setNewDOFId[i]=0; |
208 |
} |
209 |
} |
210 |
|
211 |
if (p<mpiSize-1) { // the final send can be skipped |
212 |
#ifdef ESYS_MPI |
213 |
MPI_Status status; |
214 |
MPI_Sendrecv_replace(&newGlobalDOFID[0], len, MPI_DIM_T, |
215 |
dest, MPIInfo->counter(), |
216 |
source, MPIInfo->counter(), |
217 |
MPIInfo->comm, &status); |
218 |
MPIInfo->incCounter(); |
219 |
#endif |
220 |
current_rank=MPIInfo->mod_rank(current_rank-1); |
221 |
} |
222 |
} |
223 |
for (int i=0; i<mpiSize+1; ++i) |
224 |
distribution[i]=new_distribution[i]; |
225 |
} |
226 |
|
227 |
} // namespace finley |
228 |
|