/[escript]/trunk/dudley/src/Mesh_optimizeDOFDistribution.c
ViewVC logotype

Contents of /trunk/dudley/src/Mesh_optimizeDOFDistribution.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4286 - (show annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 5 months ago) by caltinay
File MIME type: text/plain
File size: 10034 byte(s)
Assorted spelling fixes.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16 /************************************************************************************/
17
18 /* Dudley: Mesh: optimizes the distribution of DOFs across processors */
19 /* using ParMETIS. On return a new distribution is given and the globalDOF are relabeled */
20 /* accordingly but the mesh has not been redistributed yet */
21
22 /************************************************************************************/
23
24 #include "Mesh.h"
25 #include "IndexList.h"
26 #ifdef _OPENMP
27 #include <omp.h>
28 #endif
29 #ifdef USE_PARMETIS
30 #include "parmetis.h"
31 #endif
32
33 /************************************************************************************
34 Check whether there is any node which has no vertex. In case
35 such node exists, we don't use parmetis since parmetis requires
36 that every node has at least 1 vertex (at line 129 of file
37 "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
38 any node has no vertex).
39 ************************************************************************************/
40 #ifdef USE_PARMETIS
41 int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t * distribution, MPI_Comm * comm)
42 {
43 dim_t i, len;
44 int ret_val = 1;
45
46 if (rank == 0)
47 {
48 for (i = 0; i < mpiSize; i++)
49 {
50 len = distribution[i + 1] - distribution[i];
51 if (len == 0)
52 {
53 ret_val = 0;
54 break;
55 }
56 }
57 }
58 MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm);
59 if (ret_val == 0)
60 printf("INFO: Parmetis is not used since some nodes have no vertex!\n");
61 return ret_val;
62 }
63 #endif
64
65 /************************************************************************************/
66
67 void Dudley_Mesh_optimizeDOFDistribution(Dudley_Mesh * in, dim_t * distribution)
68 {
69
70 dim_t dim, i, j, k, myNumVertices, p, mpiSize, len, globalNumVertices, *partition_count = NULL, *new_distribution =
71 NULL, *loc_partition_count = NULL;
72 bool_t *setNewDOFId = NULL;
73 index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID = NULL;
74 size_t mpiSize_size;
75 index_t *partition = NULL;
76 Paso_Pattern *pattern = NULL;
77 Esys_MPI_rank myRank, current_rank, rank;
78 Dudley_IndexList *index_list = NULL;
79 float *xyz = NULL;
80 int c;
81
82 #ifdef ESYS_MPI
83 Esys_MPI_rank dest, source;
84 MPI_Status status;
85 #endif
86
87 if (in == NULL)
88 return;
89 if (in->Nodes == NULL)
90 return;
91
92 myRank = in->MPIInfo->rank;
93 mpiSize = in->MPIInfo->size;
94 mpiSize_size = mpiSize * sizeof(dim_t);
95 dim = in->Nodes->numDim;
96 /* first step is to distribute the elements according to a global X of DOF */
97
98 myFirstVertex = distribution[myRank];
99 myLastVertex = distribution[myRank + 1];
100 myNumVertices = myLastVertex - myFirstVertex;
101 globalNumVertices = distribution[mpiSize];
102 len = 0;
103 for (p = 0; p < mpiSize; ++p)
104 len = MAX(len, distribution[p + 1] - distribution[p]);
105 partition = TMPMEMALLOC(len, index_t); /* len is used for the sending around of partition later on */
106 xyz = TMPMEMALLOC(myNumVertices * dim, float);
107 partition_count = TMPMEMALLOC(mpiSize + 1, dim_t);
108 new_distribution = TMPMEMALLOC(mpiSize + 1, dim_t);
109 newGlobalDOFID = TMPMEMALLOC(len, index_t);
110 setNewDOFId = TMPMEMALLOC(in->Nodes->numNodes, bool_t);
111 if (!
112 (Dudley_checkPtr(partition) || Dudley_checkPtr(xyz) || Dudley_checkPtr(partition_count)
113 || Dudley_checkPtr(partition_count) || Dudley_checkPtr(newGlobalDOFID) || Dudley_checkPtr(setNewDOFId)))
114 {
115 dim_t *recvbuf = TMPMEMALLOC(mpiSize * mpiSize, dim_t);
116
117 /* set the coordinates: */
118 /* it is assumed that at least one node on this processor provides a coordinate */
119 #pragma omp parallel for private(i,j,k)
120 for (i = 0; i < in->Nodes->numNodes; ++i)
121 {
122 k = in->Nodes->globalDegreesOfFreedom[i] - myFirstVertex;
123 if ((k >= 0) && (k < myNumVertices))
124 {
125 for (j = 0; j < dim; ++j)
126 xyz[k * dim + j] = (float)(in->Nodes->Coordinates[INDEX2(j, i, dim)]);
127 }
128 }
129
130 index_list = TMPMEMALLOC(myNumVertices, Dudley_IndexList);
131 /* ksteube CSR of DOF IDs */
132 /* create the adjacency structure xadj and adjncy */
133 if (!Dudley_checkPtr(index_list))
134 {
135 #pragma omp parallel private(i)
136 {
137 #pragma omp for schedule(static)
138 for (i = 0; i < myNumVertices; ++i)
139 {
140 index_list[i].extension = NULL;
141 index_list[i].n = 0;
142 }
143 /* ksteube build CSR format */
144 /* insert contributions from element matrices into columns index index_list: */
145 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
146 in->Elements,
147 in->Nodes->globalDegreesOfFreedom,
148 in->Nodes->globalDegreesOfFreedom);
149 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
150 in->FaceElements,
151 in->Nodes->globalDegreesOfFreedom,
152 in->Nodes->globalDegreesOfFreedom);
153 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
154 in->Points, in->Nodes->globalDegreesOfFreedom,
155 in->Nodes->globalDegreesOfFreedom);
156 }
157
158 /* create the local matrix pattern */
159 pattern = Dudley_IndexList_createPattern(0, myNumVertices, index_list, 0, globalNumVertices, 0);
160
161 /* clean up index list */
162 if (index_list != NULL)
163 {
164 #pragma omp parallel for private(i)
165 for (i = 0; i < myNumVertices; ++i)
166 Dudley_IndexList_free(index_list[i].extension);
167 }
168
169 if (Dudley_noError())
170 {
171
172 #ifdef USE_PARMETIS
173
174 if (mpiSize > 1 && Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm)) > 0)
175 {
176 int i;
177 int wgtflag = 0;
178 int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
179 int ncon = 1;
180 int edgecut;
181 int options[2];
182 float *tpwgts = TMPMEMALLOC(ncon * mpiSize, float);
183 float *ubvec = TMPMEMALLOC(ncon, float);
184 for (i = 0; i < ncon * mpiSize; i++)
185 tpwgts[i] = 1.0 / (float)mpiSize;
186 for (i = 0; i < ncon; i++)
187 ubvec[i] = 1.05;
188 options[0] = 3;
189 options[1] = 15;
190 ParMETIS_V3_PartGeomKway(distribution, pattern->ptr, pattern->index, NULL, NULL, &wgtflag, &numflag, &dim, xyz, &ncon, &mpiSize, tpwgts, ubvec, options, &edgecut, partition, /* new CPU ownership of elements */
191 &(in->MPIInfo->comm));
192 /* printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1)); */
193 TMPMEMFREE(ubvec);
194 TMPMEMFREE(tpwgts);
195 }
196 else
197 {
198 for (i = 0; i < myNumVertices; ++i)
199 partition[i] = 0; /* CPU 0 owns it */
200 }
201 #else
202 for (i = 0; i < myNumVertices; ++i)
203 partition[i] = myRank; /* CPU 0 owns it */
204 #endif
205
206 }
207
208 Paso_Pattern_free(pattern);
209
210 /* create a new distribution and labeling of the DOF */
211 memset(new_distribution, 0, mpiSize_size);
212 #pragma omp parallel private(loc_partition_count)
213 {
214 loc_partition_count = THREAD_MEMALLOC(mpiSize, dim_t);
215 memset(loc_partition_count, 0, mpiSize_size);
216 #pragma omp for private(i)
217 for (i = 0; i < myNumVertices; ++i)
218 loc_partition_count[partition[i]]++;
219 #pragma omp critical
220 {
221 for (i = 0; i < mpiSize; ++i)
222 new_distribution[i] += loc_partition_count[i];
223 }
224 THREAD_MEMFREE(loc_partition_count);
225 }
226 #ifdef ESYS_MPI
227 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
228 MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
229 #else
230 for (i = 0; i < mpiSize; ++i)
231 recvbuf[i] = new_distribution[i];
232 #endif
233 new_distribution[0] = 0;
234 for (rank = 0; rank < mpiSize; rank++)
235 {
236 c = 0;
237 for (i = 0; i < myRank; ++i)
238 c += recvbuf[rank + mpiSize * i];
239 for (i = 0; i < myNumVertices; ++i)
240 {
241 if (rank == partition[i])
242 {
243 newGlobalDOFID[i] = new_distribution[rank] + c;
244 c++;
245 }
246 }
247 for (i = myRank + 1; i < mpiSize; ++i)
248 c += recvbuf[rank + mpiSize * i];
249 new_distribution[rank + 1] = new_distribution[rank] + c;
250 }
251 TMPMEMFREE(recvbuf);
252
253 /* now the overlap needs to be created by sending the partition around */
254 #ifdef ESYS_MPI
255 dest = Esys_MPIInfo_mod(mpiSize, myRank + 1);
256 source = Esys_MPIInfo_mod(mpiSize, myRank - 1);
257 #endif
258 current_rank = myRank;
259 #pragma omp parallel for private(i)
260 for (i = 0; i < in->Nodes->numNodes; ++i)
261 setNewDOFId[i] = TRUE;
262
263 for (p = 0; p < mpiSize; ++p)
264 {
265
266 firstVertex = distribution[current_rank];
267 lastVertex = distribution[current_rank + 1];
268 #pragma omp parallel for private(i,j,k)
269 for (i = 0; i < in->Nodes->numNodes; ++i)
270 {
271 k = in->Nodes->globalDegreesOfFreedom[i];
272 if (setNewDOFId[i] && (firstVertex <= k) && (k < lastVertex))
273 {
274 in->Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
275 setNewDOFId[i] = FALSE;
276 }
277 }
278
279 if (p < mpiSize - 1)
280 { /* the final send can be skipped */
281 #ifdef ESYS_MPI
282 MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_INT,
283 dest, in->MPIInfo->msg_tag_counter,
284 source, in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
285 #endif
286 in->MPIInfo->msg_tag_counter++;
287 current_rank = Esys_MPIInfo_mod(mpiSize, current_rank - 1);
288 }
289 }
290 for (i = 0; i < mpiSize + 1; ++i)
291 distribution[i] = new_distribution[i];
292 }
293 TMPMEMFREE(index_list);
294 }
295 TMPMEMFREE(newGlobalDOFID);
296 TMPMEMFREE(setNewDOFId);
297 TMPMEMFREE(new_distribution);
298 TMPMEMFREE(partition_count);
299 TMPMEMFREE(partition);
300 TMPMEMFREE(xyz);
301 return;
302 }

  ViewVC Help
Powered by ViewVC 1.1.26