/[escript]/trunk/dudley/src/Mesh_optimizeDOFDistribution.c
ViewVC logotype

Contents of /trunk/dudley/src/Mesh_optimizeDOFDistribution.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3603 - (show annotations)
Mon Sep 19 03:42:53 2011 UTC (7 years, 11 months ago) by caltinay
File MIME type: text/plain
File size: 9805 byte(s)
Fixed some unused var errors in non-mpi build.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 /**************************************************************/
15
16 /* Dudley: Mesh: optimizes the distribution of DOFs across processors */
17 /* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */
18 /* accordingly but the mesh has not been redesitributed yet */
19
20 /**************************************************************/
21
22 #include "Mesh.h"
23 #include "IndexList.h"
24 #ifdef _OPENMP
25 #include <omp.h>
26 #endif
27 #ifdef USE_PARMETIS
28 #include "parmetis.h"
29 #endif
30
31 /**************************************************************
32 Check whether there is any node which has no vertex. In case
33 such node exists, we don't use parmetis since parmetis requires
34 that every node has at least 1 vertex (at line 129 of file
35 "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
36 any node has no vertex).
37 **************************************************************/
38 #ifdef USE_PARMETIS
39 int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t * distribution, MPI_Comm * comm)
40 {
41 dim_t i, len;
42 int ret_val = 1;
43
44 if (rank == 0)
45 {
46 for (i = 0; i < mpiSize; i++)
47 {
48 len = distribution[i + 1] - distribution[i];
49 if (len == 0)
50 {
51 ret_val = 0;
52 break;
53 }
54 }
55 }
56 MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm);
57 if (ret_val == 0)
58 printf("INFO: Parmetis is not used since some nodes have no vertex!\n");
59 return ret_val;
60 }
61 #endif
62
63 /**************************************************************/
64
65 void Dudley_Mesh_optimizeDOFDistribution(Dudley_Mesh * in, dim_t * distribution)
66 {
67
68 dim_t dim, i, j, k, myNumVertices, p, mpiSize, len, globalNumVertices, *partition_count = NULL, *new_distribution =
69 NULL, *loc_partition_count = NULL;
70 bool_t *setNewDOFId = NULL;
71 index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID = NULL;
72 size_t mpiSize_size;
73 index_t *partition = NULL;
74 Paso_Pattern *pattern = NULL;
75 Esys_MPI_rank myRank, current_rank, rank;
76 Dudley_IndexList *index_list = NULL;
77 float *xyz = NULL;
78 int c;
79
80 #ifdef ESYS_MPI
81 Esys_MPI_rank dest, source;
82 MPI_Status status;
83 #endif
84
85 if (in == NULL)
86 return;
87 if (in->Nodes == NULL)
88 return;
89
90 myRank = in->MPIInfo->rank;
91 mpiSize = in->MPIInfo->size;
92 mpiSize_size = mpiSize * sizeof(dim_t);
93 dim = in->Nodes->numDim;
94 /* first step is to distribute the elements according to a global X of DOF */
95
96 myFirstVertex = distribution[myRank];
97 myLastVertex = distribution[myRank + 1];
98 myNumVertices = myLastVertex - myFirstVertex;
99 globalNumVertices = distribution[mpiSize];
100 len = 0;
101 for (p = 0; p < mpiSize; ++p)
102 len = MAX(len, distribution[p + 1] - distribution[p]);
103 partition = TMPMEMALLOC(len, index_t); /* len is used for the sending around of partition later on */
104 xyz = TMPMEMALLOC(myNumVertices * dim, float);
105 partition_count = TMPMEMALLOC(mpiSize + 1, dim_t);
106 new_distribution = TMPMEMALLOC(mpiSize + 1, dim_t);
107 newGlobalDOFID = TMPMEMALLOC(len, index_t);
108 setNewDOFId = TMPMEMALLOC(in->Nodes->numNodes, bool_t);
109 if (!
110 (Dudley_checkPtr(partition) || Dudley_checkPtr(xyz) || Dudley_checkPtr(partition_count)
111 || Dudley_checkPtr(partition_count) || Dudley_checkPtr(newGlobalDOFID) || Dudley_checkPtr(setNewDOFId)))
112 {
113 dim_t *recvbuf = TMPMEMALLOC(mpiSize * mpiSize, dim_t);
114
115 /* set the coordinates: */
116 /* it is assumed that at least one node on this processor provides a coordinate */
117 #pragma omp parallel for private(i,j,k)
118 for (i = 0; i < in->Nodes->numNodes; ++i)
119 {
120 k = in->Nodes->globalDegreesOfFreedom[i] - myFirstVertex;
121 if ((k >= 0) && (k < myNumVertices))
122 {
123 for (j = 0; j < dim; ++j)
124 xyz[k * dim + j] = (float)(in->Nodes->Coordinates[INDEX2(j, i, dim)]);
125 }
126 }
127
128 index_list = TMPMEMALLOC(myNumVertices, Dudley_IndexList);
129 /* ksteube CSR of DOF IDs */
130 /* create the adjacency structure xadj and adjncy */
131 if (!Dudley_checkPtr(index_list))
132 {
133 #pragma omp parallel private(i)
134 {
135 #pragma omp for schedule(static)
136 for (i = 0; i < myNumVertices; ++i)
137 {
138 index_list[i].extension = NULL;
139 index_list[i].n = 0;
140 }
141 /* ksteube build CSR format */
142 /* insert contributions from element matrices into colums index index_list: */
143 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
144 in->Elements,
145 in->Nodes->globalDegreesOfFreedom,
146 in->Nodes->globalDegreesOfFreedom);
147 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
148 in->FaceElements,
149 in->Nodes->globalDegreesOfFreedom,
150 in->Nodes->globalDegreesOfFreedom);
151 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
152 in->Points, in->Nodes->globalDegreesOfFreedom,
153 in->Nodes->globalDegreesOfFreedom);
154 }
155
156 /* create the local matrix pattern */
157 pattern = Dudley_IndexList_createPattern(0, myNumVertices, index_list, 0, globalNumVertices, 0);
158
159 /* clean up index list */
160 if (index_list != NULL)
161 {
162 #pragma omp parallel for private(i)
163 for (i = 0; i < myNumVertices; ++i)
164 Dudley_IndexList_free(index_list[i].extension);
165 }
166
167 if (Dudley_noError())
168 {
169
170 #ifdef USE_PARMETIS
171
172 if (mpiSize > 1 && Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm)) > 0)
173 {
174 int i;
175 int wgtflag = 0;
176 int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
177 int ncon = 1;
178 int edgecut;
179 int options[2];
180 float *tpwgts = TMPMEMALLOC(ncon * mpiSize, float);
181 float *ubvec = TMPMEMALLOC(ncon, float);
182 for (i = 0; i < ncon * mpiSize; i++)
183 tpwgts[i] = 1.0 / (float)mpiSize;
184 for (i = 0; i < ncon; i++)
185 ubvec[i] = 1.05;
186 options[0] = 3;
187 options[1] = 15;
188 ParMETIS_V3_PartGeomKway(distribution, pattern->ptr, pattern->index, NULL, NULL, &wgtflag, &numflag, &dim, xyz, &ncon, &mpiSize, tpwgts, ubvec, options, &edgecut, partition, /* new CPU ownership of elements */
189 &(in->MPIInfo->comm));
190 /* printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1)); */
191 TMPMEMFREE(ubvec);
192 TMPMEMFREE(tpwgts);
193 }
194 else
195 {
196 for (i = 0; i < myNumVertices; ++i)
197 partition[i] = 0; /* CPU 0 owns it */
198 }
199 #else
200 for (i = 0; i < myNumVertices; ++i)
201 partition[i] = myRank; /* CPU 0 owns it */
202 #endif
203
204 }
205
206 Paso_Pattern_free(pattern);
207
208 /* create a new distributioin and labeling of the DOF */
209 memset(new_distribution, 0, mpiSize_size);
210 #pragma omp parallel private(loc_partition_count)
211 {
212 loc_partition_count = THREAD_MEMALLOC(mpiSize, dim_t);
213 memset(loc_partition_count, 0, mpiSize_size);
214 #pragma omp for private(i)
215 for (i = 0; i < myNumVertices; ++i)
216 loc_partition_count[partition[i]]++;
217 #pragma omp critical
218 {
219 for (i = 0; i < mpiSize; ++i)
220 new_distribution[i] += loc_partition_count[i];
221 }
222 THREAD_MEMFREE(loc_partition_count);
223 }
224 #ifdef ESYS_MPI
225 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
226 MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
227 #else
228 for (i = 0; i < mpiSize; ++i)
229 recvbuf[i] = new_distribution[i];
230 #endif
231 new_distribution[0] = 0;
232 for (rank = 0; rank < mpiSize; rank++)
233 {
234 c = 0;
235 for (i = 0; i < myRank; ++i)
236 c += recvbuf[rank + mpiSize * i];
237 for (i = 0; i < myNumVertices; ++i)
238 {
239 if (rank == partition[i])
240 {
241 newGlobalDOFID[i] = new_distribution[rank] + c;
242 c++;
243 }
244 }
245 for (i = myRank + 1; i < mpiSize; ++i)
246 c += recvbuf[rank + mpiSize * i];
247 new_distribution[rank + 1] = new_distribution[rank] + c;
248 }
249 TMPMEMFREE(recvbuf);
250
251 /* now the overlap needs to be created by sending the partition around */
252 #ifdef ESYS_MPI
253 dest = Esys_MPIInfo_mod(mpiSize, myRank + 1);
254 source = Esys_MPIInfo_mod(mpiSize, myRank - 1);
255 #endif
256 current_rank = myRank;
257 #pragma omp parallel for private(i)
258 for (i = 0; i < in->Nodes->numNodes; ++i)
259 setNewDOFId[i] = TRUE;
260
261 for (p = 0; p < mpiSize; ++p)
262 {
263
264 firstVertex = distribution[current_rank];
265 lastVertex = distribution[current_rank + 1];
266 #pragma omp parallel for private(i,j,k)
267 for (i = 0; i < in->Nodes->numNodes; ++i)
268 {
269 k = in->Nodes->globalDegreesOfFreedom[i];
270 if (setNewDOFId[i] && (firstVertex <= k) && (k < lastVertex))
271 {
272 in->Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
273 setNewDOFId[i] = FALSE;
274 }
275 }
276
277 if (p < mpiSize - 1)
278 { /* the final send can be skipped */
279 #ifdef ESYS_MPI
280 MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_INT,
281 dest, in->MPIInfo->msg_tag_counter,
282 source, in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
283 #endif
284 in->MPIInfo->msg_tag_counter++;
285 current_rank = Esys_MPIInfo_mod(mpiSize, current_rank - 1);
286 }
287 }
288 for (i = 0; i < mpiSize + 1; ++i)
289 distribution[i] = new_distribution[i];
290 }
291 TMPMEMFREE(index_list);
292 }
293 TMPMEMFREE(newGlobalDOFID);
294 TMPMEMFREE(setNewDOFId);
295 TMPMEMFREE(new_distribution);
296 TMPMEMFREE(partition_count);
297 TMPMEMFREE(partition);
298 TMPMEMFREE(xyz);
299 return;
300 }

  ViewVC Help
Powered by ViewVC 1.1.26