1 |
|
2 |
/***************************************************************************** |
3 |
* |
4 |
* Copyright (c) 2003-2013 by University of Queensland |
5 |
* http://www.uq.edu.au |
6 |
* |
7 |
* Primary Business: Queensland, Australia |
8 |
* Licensed under the Open Software License version 3.0 |
9 |
* http://www.opensource.org/licenses/osl-3.0.php |
10 |
* |
11 |
* Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
12 |
* Development since 2012 by School of Earth Sciences |
13 |
* |
14 |
*****************************************************************************/ |
15 |
|
16 |
/************************************************************************************/ |
17 |
|
18 |
/* Dudley: Mesh: optimizes the distribution of DOFs across processors */ |
19 |
/* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */ |
20 |
/* accordingly but the mesh has not been redesitributed yet */ |
21 |
|
22 |
/************************************************************************************/ |
23 |
|
24 |
#include "Mesh.h" |
25 |
#include "IndexList.h" |
26 |
#ifdef _OPENMP |
27 |
#include <omp.h> |
28 |
#endif |
29 |
#ifdef USE_PARMETIS |
30 |
#include "parmetis.h" |
31 |
#endif |
32 |
|
33 |
/************************************************************************************ |
34 |
Check whether there is any node which has no vertex. In case |
35 |
such node exists, we don't use parmetis since parmetis requires |
36 |
that every node has at least 1 vertex (at line 129 of file |
37 |
"xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if |
38 |
any node has no vertex). |
39 |
************************************************************************************/ |
40 |
#ifdef USE_PARMETIS |
41 |
int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t * distribution, MPI_Comm * comm) |
42 |
{ |
43 |
dim_t i, len; |
44 |
int ret_val = 1; |
45 |
|
46 |
if (rank == 0) |
47 |
{ |
48 |
for (i = 0; i < mpiSize; i++) |
49 |
{ |
50 |
len = distribution[i + 1] - distribution[i]; |
51 |
if (len == 0) |
52 |
{ |
53 |
ret_val = 0; |
54 |
break; |
55 |
} |
56 |
} |
57 |
} |
58 |
MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm); |
59 |
if (ret_val == 0) |
60 |
printf("INFO: Parmetis is not used since some nodes have no vertex!\n"); |
61 |
return ret_val; |
62 |
} |
63 |
#endif |
64 |
|
65 |
/************************************************************************************/ |
66 |
|
67 |
void Dudley_Mesh_optimizeDOFDistribution(Dudley_Mesh * in, dim_t * distribution) |
68 |
{ |
69 |
|
70 |
dim_t dim, i, j, k, myNumVertices, p, mpiSize, len, globalNumVertices, *partition_count = NULL, *new_distribution = |
71 |
NULL, *loc_partition_count = NULL; |
72 |
bool_t *setNewDOFId = NULL; |
73 |
index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID = NULL; |
74 |
size_t mpiSize_size; |
75 |
index_t *partition = NULL; |
76 |
Paso_Pattern *pattern = NULL; |
77 |
Esys_MPI_rank myRank, current_rank, rank; |
78 |
Dudley_IndexList *index_list = NULL; |
79 |
float *xyz = NULL; |
80 |
int c; |
81 |
|
82 |
#ifdef ESYS_MPI |
83 |
Esys_MPI_rank dest, source; |
84 |
MPI_Status status; |
85 |
#endif |
86 |
|
87 |
if (in == NULL) |
88 |
return; |
89 |
if (in->Nodes == NULL) |
90 |
return; |
91 |
|
92 |
myRank = in->MPIInfo->rank; |
93 |
mpiSize = in->MPIInfo->size; |
94 |
mpiSize_size = mpiSize * sizeof(dim_t); |
95 |
dim = in->Nodes->numDim; |
96 |
/* first step is to distribute the elements according to a global X of DOF */ |
97 |
|
98 |
myFirstVertex = distribution[myRank]; |
99 |
myLastVertex = distribution[myRank + 1]; |
100 |
myNumVertices = myLastVertex - myFirstVertex; |
101 |
globalNumVertices = distribution[mpiSize]; |
102 |
len = 0; |
103 |
for (p = 0; p < mpiSize; ++p) |
104 |
len = MAX(len, distribution[p + 1] - distribution[p]); |
105 |
partition = new index_t[len]; /* len is used for the sending around of partition later on */ |
106 |
xyz = new float[myNumVertices * dim]; |
107 |
partition_count = new dim_t[mpiSize + 1]; |
108 |
new_distribution = new dim_t[mpiSize + 1]; |
109 |
newGlobalDOFID = new index_t[len]; |
110 |
setNewDOFId = new bool_t[in->Nodes->numNodes]; |
111 |
if (! |
112 |
(Dudley_checkPtr(partition) || Dudley_checkPtr(xyz) || Dudley_checkPtr(partition_count) |
113 |
|| Dudley_checkPtr(partition_count) || Dudley_checkPtr(newGlobalDOFID) || Dudley_checkPtr(setNewDOFId))) |
114 |
{ |
115 |
dim_t *recvbuf = new dim_t[mpiSize * mpiSize]; |
116 |
|
117 |
/* set the coordinates: */ |
118 |
/* it is assumed that at least one node on this processor provides a coordinate */ |
119 |
#pragma omp parallel for private(i,j,k) |
120 |
for (i = 0; i < in->Nodes->numNodes; ++i) |
121 |
{ |
122 |
k = in->Nodes->globalDegreesOfFreedom[i] - myFirstVertex; |
123 |
if ((k >= 0) && (k < myNumVertices)) |
124 |
{ |
125 |
for (j = 0; j < dim; ++j) |
126 |
xyz[k * dim + j] = (float)(in->Nodes->Coordinates[INDEX2(j, i, dim)]); |
127 |
} |
128 |
} |
129 |
|
130 |
index_list = new Dudley_IndexList[myNumVertices]; |
131 |
/* ksteube CSR of DOF IDs */ |
132 |
/* create the adjacency structure xadj and adjncy */ |
133 |
if (!Dudley_checkPtr(index_list)) |
134 |
{ |
135 |
#pragma omp parallel private(i) |
136 |
{ |
137 |
#pragma omp for schedule(static) |
138 |
for (i = 0; i < myNumVertices; ++i) |
139 |
{ |
140 |
index_list[i].extension = NULL; |
141 |
index_list[i].n = 0; |
142 |
} |
143 |
/* ksteube build CSR format */ |
144 |
/* insert contributions from element matrices into colums index index_list: */ |
145 |
Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex, |
146 |
in->Elements, |
147 |
in->Nodes->globalDegreesOfFreedom, |
148 |
in->Nodes->globalDegreesOfFreedom); |
149 |
Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex, |
150 |
in->FaceElements, |
151 |
in->Nodes->globalDegreesOfFreedom, |
152 |
in->Nodes->globalDegreesOfFreedom); |
153 |
Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex, |
154 |
in->Points, in->Nodes->globalDegreesOfFreedom, |
155 |
in->Nodes->globalDegreesOfFreedom); |
156 |
} |
157 |
|
158 |
/* create the local matrix pattern */ |
159 |
pattern = Dudley_IndexList_createPattern(0, myNumVertices, index_list, 0, globalNumVertices, 0); |
160 |
|
161 |
/* clean up index list */ |
162 |
if (index_list != NULL) |
163 |
{ |
164 |
#pragma omp parallel for private(i) |
165 |
for (i = 0; i < myNumVertices; ++i) |
166 |
Dudley_IndexList_free(index_list[i].extension); |
167 |
} |
168 |
|
169 |
if (Dudley_noError()) |
170 |
{ |
171 |
|
172 |
#ifdef USE_PARMETIS |
173 |
|
174 |
if (mpiSize > 1 && Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm)) > 0) |
175 |
{ |
176 |
int i; |
177 |
int wgtflag = 0; |
178 |
int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */ |
179 |
int ncon = 1; |
180 |
int edgecut; |
181 |
int options[2]; |
182 |
float *tpwgts = new float[ncon * mpiSize]; |
183 |
float *ubvec = new float[ncon]; |
184 |
for (i = 0; i < ncon * mpiSize; i++) |
185 |
tpwgts[i] = 1.0 / (float)mpiSize; |
186 |
for (i = 0; i < ncon; i++) |
187 |
ubvec[i] = 1.05; |
188 |
options[0] = 3; |
189 |
options[1] = 15; |
190 |
ParMETIS_V3_PartGeomKway(distribution, pattern->ptr, pattern->index, NULL, NULL, &wgtflag, &numflag, &dim, xyz, &ncon, &mpiSize, tpwgts, ubvec, options, &edgecut, partition, /* new CPU ownership of elements */ |
191 |
&(in->MPIInfo->comm)); |
192 |
/* printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1)); */ |
193 |
delete[] ubvec; |
194 |
delete[] tpwgts; |
195 |
} |
196 |
else |
197 |
{ |
198 |
for (i = 0; i < myNumVertices; ++i) |
199 |
partition[i] = 0; /* CPU 0 owns it */ |
200 |
} |
201 |
#else |
202 |
for (i = 0; i < myNumVertices; ++i) |
203 |
partition[i] = myRank; /* CPU 0 owns it */ |
204 |
#endif |
205 |
|
206 |
} |
207 |
|
208 |
Paso_Pattern_free(pattern); |
209 |
|
210 |
/* create a new distributioin and labeling of the DOF */ |
211 |
memset(new_distribution, 0, mpiSize_size); |
212 |
#pragma omp parallel private(loc_partition_count) |
213 |
{ |
214 |
loc_partition_count = THREAD_MEMALLOC(mpiSize, dim_t); |
215 |
memset(loc_partition_count, 0, mpiSize_size); |
216 |
#pragma omp for private(i) |
217 |
for (i = 0; i < myNumVertices; ++i) |
218 |
loc_partition_count[partition[i]]++; |
219 |
#pragma omp critical |
220 |
{ |
221 |
for (i = 0; i < mpiSize; ++i) |
222 |
new_distribution[i] += loc_partition_count[i]; |
223 |
} |
224 |
THREAD_MEMFREE(loc_partition_count); |
225 |
} |
226 |
#ifdef ESYS_MPI |
227 |
/* recvbuf will be the concatenation of each CPU's contribution to new_distribution */ |
228 |
MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm); |
229 |
#else |
230 |
for (i = 0; i < mpiSize; ++i) |
231 |
recvbuf[i] = new_distribution[i]; |
232 |
#endif |
233 |
new_distribution[0] = 0; |
234 |
for (rank = 0; rank < mpiSize; rank++) |
235 |
{ |
236 |
c = 0; |
237 |
for (i = 0; i < myRank; ++i) |
238 |
c += recvbuf[rank + mpiSize * i]; |
239 |
for (i = 0; i < myNumVertices; ++i) |
240 |
{ |
241 |
if (rank == partition[i]) |
242 |
{ |
243 |
newGlobalDOFID[i] = new_distribution[rank] + c; |
244 |
c++; |
245 |
} |
246 |
} |
247 |
for (i = myRank + 1; i < mpiSize; ++i) |
248 |
c += recvbuf[rank + mpiSize * i]; |
249 |
new_distribution[rank + 1] = new_distribution[rank] + c; |
250 |
} |
251 |
delete[] recvbuf; |
252 |
|
253 |
/* now the overlap needs to be created by sending the partition around */ |
254 |
#ifdef ESYS_MPI |
255 |
dest = Esys_MPIInfo_mod(mpiSize, myRank + 1); |
256 |
source = Esys_MPIInfo_mod(mpiSize, myRank - 1); |
257 |
#endif |
258 |
current_rank = myRank; |
259 |
#pragma omp parallel for private(i) |
260 |
for (i = 0; i < in->Nodes->numNodes; ++i) |
261 |
setNewDOFId[i] = TRUE; |
262 |
|
263 |
for (p = 0; p < mpiSize; ++p) |
264 |
{ |
265 |
|
266 |
firstVertex = distribution[current_rank]; |
267 |
lastVertex = distribution[current_rank + 1]; |
268 |
#pragma omp parallel for private(i,j,k) |
269 |
for (i = 0; i < in->Nodes->numNodes; ++i) |
270 |
{ |
271 |
k = in->Nodes->globalDegreesOfFreedom[i]; |
272 |
if (setNewDOFId[i] && (firstVertex <= k) && (k < lastVertex)) |
273 |
{ |
274 |
in->Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex]; |
275 |
setNewDOFId[i] = FALSE; |
276 |
} |
277 |
} |
278 |
|
279 |
if (p < mpiSize - 1) |
280 |
{ /* the final send can be skipped */ |
281 |
#ifdef ESYS_MPI |
282 |
MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_INT, |
283 |
dest, in->MPIInfo->msg_tag_counter, |
284 |
source, in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status); |
285 |
#endif |
286 |
in->MPIInfo->msg_tag_counter++; |
287 |
current_rank = Esys_MPIInfo_mod(mpiSize, current_rank - 1); |
288 |
} |
289 |
} |
290 |
for (i = 0; i < mpiSize + 1; ++i) |
291 |
distribution[i] = new_distribution[i]; |
292 |
} |
293 |
delete[] index_list; |
294 |
} |
295 |
delete[] newGlobalDOFID; |
296 |
delete[] setNewDOFId; |
297 |
delete[] new_distribution; |
298 |
delete[] partition_count; |
299 |
delete[] partition; |
300 |
delete[] xyz; |
301 |
return; |
302 |
} |