/[escript]/trunk/dudley/src/Mesh_optimizeDOFDistribution.c
ViewVC logotype

Contents of /trunk/dudley/src/Mesh_optimizeDOFDistribution.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3231 - (show annotations)
Fri Oct 1 01:53:46 2010 UTC (8 years, 10 months ago) by jfenwick
Original Path: branches/domexper/dudley/src/Mesh_optimizeDOFDistribution.c
File MIME type: text/plain
File size: 9765 byte(s)
Fix MPI and OMP problems not detected in serial

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 /**************************************************************/
15
16 /* Dudley: Mesh: optimizes the distribution of DOFs across processors */
17 /* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */
18 /* accordingly but the mesh has not been redesitributed yet */
19
20 /**************************************************************/
21
22 #include "Mesh.h"
23 #include "IndexList.h"
24 #ifdef _OPENMP
25 #include <omp.h>
26 #endif
27 #ifdef USE_PARMETIS
28 #include "parmetis.h"
29 #endif
30
31 /**************************************************************
32 Check whether there is any node which has no vertex. In case
33 such node exists, we don't use parmetis since parmetis requires
34 that every node has at least 1 vertex (at line 129 of file
35 "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
36 any node has no vertex).
37 **************************************************************/
38 #ifdef USE_PARMETIS
39 int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t * distribution, MPI_Comm * comm)
40 {
41 dim_t i, len;
42 int ret_val = 1;
43
44 if (rank == 0)
45 {
46 for (i = 0; i < mpiSize; i++)
47 {
48 len = distribution[i + 1] - distribution[i];
49 if (len == 0)
50 {
51 ret_val = 0;
52 break;
53 }
54 }
55 }
56 MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm);
57 if (ret_val == 0)
58 printf("INFO: Parmetis is not used since some nodes have no vertex!\n");
59 return ret_val;
60 }
61 #endif
62
63 /**************************************************************/
64
65 void Dudley_Mesh_optimizeDOFDistribution(Dudley_Mesh * in, dim_t * distribution)
66 {
67
68 dim_t dim, i, j, k, myNumVertices, p, mpiSize, len, globalNumVertices, *partition_count = NULL, *new_distribution =
69 NULL, *loc_partition_count = NULL;
70 bool_t *setNewDOFId = NULL;
71 index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID = NULL;
72 size_t mpiSize_size;
73 index_t *partition = NULL;
74 Paso_Pattern *pattern = NULL;
75 Esys_MPI_rank myRank, dest, source, current_rank, rank;
76 Dudley_IndexList *index_list = NULL;
77 float *xyz = NULL;
78 int c;
79
80 #ifdef ESYS_MPI
81 MPI_Status status;
82 #endif
83
84 if (in == NULL)
85 return;
86 if (in->Nodes == NULL)
87 return;
88
89 myRank = in->MPIInfo->rank;
90 mpiSize = in->MPIInfo->size;
91 mpiSize_size = mpiSize * sizeof(dim_t);
92 dim = in->Nodes->numDim;
93 /* first step is to distribute the elements according to a global X of DOF */
94
95 myFirstVertex = distribution[myRank];
96 myLastVertex = distribution[myRank + 1];
97 myNumVertices = myLastVertex - myFirstVertex;
98 globalNumVertices = distribution[mpiSize];
99 len = 0;
100 for (p = 0; p < mpiSize; ++p)
101 len = MAX(len, distribution[p + 1] - distribution[p]);
102 partition = TMPMEMALLOC(len, index_t); /* len is used for the sending around of partition later on */
103 xyz = TMPMEMALLOC(myNumVertices * dim, float);
104 partition_count = TMPMEMALLOC(mpiSize + 1, dim_t);
105 new_distribution = TMPMEMALLOC(mpiSize + 1, dim_t);
106 newGlobalDOFID = TMPMEMALLOC(len, index_t);
107 setNewDOFId = TMPMEMALLOC(in->Nodes->numNodes, bool_t);
108 if (!
109 (Dudley_checkPtr(partition) || Dudley_checkPtr(xyz) || Dudley_checkPtr(partition_count)
110 || Dudley_checkPtr(partition_count) || Dudley_checkPtr(newGlobalDOFID) || Dudley_checkPtr(setNewDOFId)))
111 {
112 dim_t *recvbuf = TMPMEMALLOC(mpiSize * mpiSize, dim_t);
113
114 /* set the coordinates: */
115 /* it is assumed that at least one node on this processor provides a coordinate */
116 #pragma omp parallel for private(i,j,k)
117 for (i = 0; i < in->Nodes->numNodes; ++i)
118 {
119 k = in->Nodes->globalDegreesOfFreedom[i] - myFirstVertex;
120 if ((k >= 0) && (k < myNumVertices))
121 {
122 for (j = 0; j < dim; ++j)
123 xyz[k * dim + j] = (float)(in->Nodes->Coordinates[INDEX2(j, i, dim)]);
124 }
125 }
126
127 index_list = TMPMEMALLOC(myNumVertices, Dudley_IndexList);
128 /* ksteube CSR of DOF IDs */
129 /* create the adjacency structure xadj and adjncy */
130 if (!Dudley_checkPtr(index_list))
131 {
132 #pragma omp parallel private(i)
133 {
134 #pragma omp for schedule(static)
135 for (i = 0; i < myNumVertices; ++i)
136 {
137 index_list[i].extension = NULL;
138 index_list[i].n = 0;
139 }
140 /* ksteube build CSR format */
141 /* insert contributions from element matrices into colums index index_list: */
142 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
143 in->Elements,
144 in->Nodes->globalDegreesOfFreedom,
145 in->Nodes->globalDegreesOfFreedom);
146 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
147 in->FaceElements,
148 in->Nodes->globalDegreesOfFreedom,
149 in->Nodes->globalDegreesOfFreedom);
150 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
151 in->Points, in->Nodes->globalDegreesOfFreedom,
152 in->Nodes->globalDegreesOfFreedom);
153 }
154
155 /* create the local matrix pattern */
156 pattern = Dudley_IndexList_createPattern(0, myNumVertices, index_list, 0, globalNumVertices, 0);
157
158 /* clean up index list */
159 if (index_list != NULL)
160 {
161 #pragma omp parallel for private(i)
162 for (i = 0; i < myNumVertices; ++i)
163 Dudley_IndexList_free(index_list[i].extension);
164 }
165
166 if (Dudley_noError())
167 {
168
169 #ifdef USE_PARMETIS
170
171 if (mpiSize > 1 && Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm)) > 0)
172 {
173 int i;
174 int wgtflag = 0;
175 int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
176 int ncon = 1;
177 int edgecut;
178 int options[2];
179 float *tpwgts = TMPMEMALLOC(ncon * mpiSize, float);
180 float *ubvec = TMPMEMALLOC(ncon, float);
181 for (i = 0; i < ncon * mpiSize; i++)
182 tpwgts[i] = 1.0 / (float)mpiSize;
183 for (i = 0; i < ncon; i++)
184 ubvec[i] = 1.05;
185 options[0] = 3;
186 options[1] = 15;
187 ParMETIS_V3_PartGeomKway(distribution, pattern->ptr, pattern->index, NULL, NULL, &wgtflag, &numflag, &dim, xyz, &ncon, &mpiSize, tpwgts, ubvec, options, &edgecut, partition, /* new CPU ownership of elements */
188 &(in->MPIInfo->comm));
189 /* printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1)); */
190 TMPMEMFREE(ubvec);
191 TMPMEMFREE(tpwgts);
192 }
193 else
194 {
195 for (i = 0; i < myNumVertices; ++i)
196 partition[i] = 0; /* CPU 0 owns it */
197 }
198 #else
199 for (i = 0; i < myNumVertices; ++i)
200 partition[i] = myRank; /* CPU 0 owns it */
201 #endif
202
203 }
204
205 Paso_Pattern_free(pattern);
206
207 /* create a new distributioin and labeling of the DOF */
208 memset(new_distribution, 0, mpiSize_size);
209 #pragma omp parallel private(loc_partition_count)
210 {
211 loc_partition_count = THREAD_MEMALLOC(mpiSize, dim_t);
212 memset(loc_partition_count, 0, mpiSize_size);
213 #pragma omp for private(i)
214 for (i = 0; i < myNumVertices; ++i)
215 loc_partition_count[partition[i]]++;
216 #pragma omp critical
217 {
218 for (i = 0; i < mpiSize; ++i)
219 new_distribution[i] += loc_partition_count[i];
220 }
221 THREAD_MEMFREE(loc_partition_count);
222 }
223 #ifdef ESYS_MPI
224 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
225 MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
226 #else
227 for (i = 0; i < mpiSize; ++i)
228 recvbuf[i] = new_distribution[i];
229 #endif
230 new_distribution[0] = 0;
231 for (rank = 0; rank < mpiSize; rank++)
232 {
233 c = 0;
234 for (i = 0; i < myRank; ++i)
235 c += recvbuf[rank + mpiSize * i];
236 for (i = 0; i < myNumVertices; ++i)
237 {
238 if (rank == partition[i])
239 {
240 newGlobalDOFID[i] = new_distribution[rank] + c;
241 c++;
242 }
243 }
244 for (i = myRank + 1; i < mpiSize; ++i)
245 c += recvbuf[rank + mpiSize * i];
246 new_distribution[rank + 1] = new_distribution[rank] + c;
247 }
248 TMPMEMFREE(recvbuf);
249
250 /* now the overlap needs to be created by sending the partition around */
251
252 dest = Esys_MPIInfo_mod(mpiSize, myRank + 1);
253 source = Esys_MPIInfo_mod(mpiSize, myRank - 1);
254 current_rank = myRank;
255 #pragma omp parallel for private(i)
256 for (i = 0; i < in->Nodes->numNodes; ++i)
257 setNewDOFId[i] = TRUE;
258
259 for (p = 0; p < mpiSize; ++p)
260 {
261
262 firstVertex = distribution[current_rank];
263 lastVertex = distribution[current_rank + 1];
264 #pragma omp parallel for private(i,j,k)
265 for (i = 0; i < in->Nodes->numNodes; ++i)
266 {
267 k = in->Nodes->globalDegreesOfFreedom[i];
268 if (setNewDOFId[i] && (firstVertex <= k) && (k < lastVertex))
269 {
270 in->Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
271 setNewDOFId[i] = FALSE;
272 }
273 }
274
275 if (p < mpiSize - 1)
276 { /* the final send can be skipped */
277 #ifdef ESYS_MPI
278 MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_INT,
279 dest, in->MPIInfo->msg_tag_counter,
280 source, in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
281 #endif
282 in->MPIInfo->msg_tag_counter++;
283 current_rank = Esys_MPIInfo_mod(mpiSize, current_rank - 1);
284 }
285 }
286 for (i = 0; i < mpiSize + 1; ++i)
287 distribution[i] = new_distribution[i];
288 }
289 TMPMEMFREE(index_list);
290 }
291 TMPMEMFREE(newGlobalDOFID);
292 TMPMEMFREE(setNewDOFId);
293 TMPMEMFREE(new_distribution);
294 TMPMEMFREE(partition_count);
295 TMPMEMFREE(partition);
296 TMPMEMFREE(xyz);
297 return;
298 }

  ViewVC Help
Powered by ViewVC 1.1.26