/[escript]/trunk/dudley/src/Mesh_optimizeDOFDistribution.cpp
ViewVC logotype

Contents of /trunk/dudley/src/Mesh_optimizeDOFDistribution.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4346 - (show annotations)
Tue Apr 2 04:46:45 2013 UTC (6 years, 4 months ago) by jfenwick
File size: 9907 byte(s)
Bringing the changes from doubleplusgood branch.
Can't merge directly because svn doesn't transfer changes to renamed files (mutter grumble).
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16 /************************************************************************************/
17
18 /* Dudley: Mesh: optimizes the distribution of DOFs across processors */
19 /* using ParMETIS. On return a new distribution is given and the globalDOF are relabelled */
20 /* accordingly but the mesh has not been redistributed yet */
21
22 /************************************************************************************/
23
24 #include "Mesh.h"
25 #include "IndexList.h"
26 #ifdef _OPENMP
27 #include <omp.h>
28 #endif
29 #ifdef USE_PARMETIS
30 #include "parmetis.h"
31 #endif
32
33 /************************************************************************************
34 Check whether there is any node which has no vertex. In case
35 such node exists, we don't use parmetis since parmetis requires
36 that every node has at least 1 vertex (at line 129 of file
37 "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
38 any node has no vertex).
39 ************************************************************************************/
40 #ifdef USE_PARMETIS
41 int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t * distribution, MPI_Comm * comm)
42 {
43 dim_t i, len;
44 int ret_val = 1;
45
46 if (rank == 0)
47 {
48 for (i = 0; i < mpiSize; i++)
49 {
50 len = distribution[i + 1] - distribution[i];
51 if (len == 0)
52 {
53 ret_val = 0;
54 break;
55 }
56 }
57 }
58 MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm);
59 if (ret_val == 0)
60 printf("INFO: Parmetis is not used since some nodes have no vertex!\n");
61 return ret_val;
62 }
63 #endif
64
65 /************************************************************************************/
66
67 void Dudley_Mesh_optimizeDOFDistribution(Dudley_Mesh * in, dim_t * distribution)
68 {
69
70 dim_t dim, i, j, k, myNumVertices, p, mpiSize, len, globalNumVertices, *partition_count = NULL, *new_distribution =
71 NULL, *loc_partition_count = NULL;
72 bool_t *setNewDOFId = NULL;
73 index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID = NULL;
74 size_t mpiSize_size;
75 index_t *partition = NULL;
76 Paso_Pattern *pattern = NULL;
77 Esys_MPI_rank myRank, current_rank, rank;
78 Dudley_IndexList *index_list = NULL;
79 float *xyz = NULL;
80 int c;
81
82 #ifdef ESYS_MPI
83 Esys_MPI_rank dest, source;
84 MPI_Status status;
85 #endif
86
87 if (in == NULL)
88 return;
89 if (in->Nodes == NULL)
90 return;
91
92 myRank = in->MPIInfo->rank;
93 mpiSize = in->MPIInfo->size;
94 mpiSize_size = mpiSize * sizeof(dim_t);
95 dim = in->Nodes->numDim;
96 /* first step is to distribute the elements according to a global X of DOF */
97
98 myFirstVertex = distribution[myRank];
99 myLastVertex = distribution[myRank + 1];
100 myNumVertices = myLastVertex - myFirstVertex;
101 globalNumVertices = distribution[mpiSize];
102 len = 0;
103 for (p = 0; p < mpiSize; ++p)
104 len = MAX(len, distribution[p + 1] - distribution[p]);
105 partition = new index_t[len]; /* len is used for the sending around of partition later on */
106 xyz = new float[myNumVertices * dim];
107 partition_count = new dim_t[mpiSize + 1];
108 new_distribution = new dim_t[mpiSize + 1];
109 newGlobalDOFID = new index_t[len];
110 setNewDOFId = new bool_t[in->Nodes->numNodes];
111 if (!
112 (Dudley_checkPtr(partition) || Dudley_checkPtr(xyz) || Dudley_checkPtr(partition_count)
113 || Dudley_checkPtr(partition_count) || Dudley_checkPtr(newGlobalDOFID) || Dudley_checkPtr(setNewDOFId)))
114 {
115 dim_t *recvbuf = new dim_t[mpiSize * mpiSize];
116
117 /* set the coordinates: */
118 /* it is assumed that at least one node on this processor provides a coordinate */
119 #pragma omp parallel for private(i,j,k)
120 for (i = 0; i < in->Nodes->numNodes; ++i)
121 {
122 k = in->Nodes->globalDegreesOfFreedom[i] - myFirstVertex;
123 if ((k >= 0) && (k < myNumVertices))
124 {
125 for (j = 0; j < dim; ++j)
126 xyz[k * dim + j] = (float)(in->Nodes->Coordinates[INDEX2(j, i, dim)]);
127 }
128 }
129
130 index_list = new Dudley_IndexList[myNumVertices];
131 /* ksteube CSR of DOF IDs */
132 /* create the adjacency structure xadj and adjncy */
133 if (!Dudley_checkPtr(index_list))
134 {
135 #pragma omp parallel private(i)
136 {
137 #pragma omp for schedule(static)
138 for (i = 0; i < myNumVertices; ++i)
139 {
140 index_list[i].extension = NULL;
141 index_list[i].n = 0;
142 }
143 /* ksteube build CSR format */
144 /* insert contributions from element matrices into columns index index_list: */
145 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
146 in->Elements,
147 in->Nodes->globalDegreesOfFreedom,
148 in->Nodes->globalDegreesOfFreedom);
149 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
150 in->FaceElements,
151 in->Nodes->globalDegreesOfFreedom,
152 in->Nodes->globalDegreesOfFreedom);
153 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
154 in->Points, in->Nodes->globalDegreesOfFreedom,
155 in->Nodes->globalDegreesOfFreedom);
156 }
157
158 /* create the local matrix pattern */
159 pattern = Dudley_IndexList_createPattern(0, myNumVertices, index_list, 0, globalNumVertices, 0);
160
161 /* clean up index list */
162 if (index_list != NULL)
163 {
164 #pragma omp parallel for private(i)
165 for (i = 0; i < myNumVertices; ++i)
166 Dudley_IndexList_free(index_list[i].extension);
167 }
168
169 if (Dudley_noError())
170 {
171
172 #ifdef USE_PARMETIS
173
174 if (mpiSize > 1 && Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm)) > 0)
175 {
176 int i;
177 int wgtflag = 0;
178 int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
179 int ncon = 1;
180 int edgecut;
181 int options[2];
182 float *tpwgts = new float[ncon * mpiSize];
183 float *ubvec = new float[ncon];
184 for (i = 0; i < ncon * mpiSize; i++)
185 tpwgts[i] = 1.0 / (float)mpiSize;
186 for (i = 0; i < ncon; i++)
187 ubvec[i] = 1.05;
188 options[0] = 3;
189 options[1] = 15;
190 ParMETIS_V3_PartGeomKway(distribution, pattern->ptr, pattern->index, NULL, NULL, &wgtflag, &numflag, &dim, xyz, &ncon, &mpiSize, tpwgts, ubvec, options, &edgecut, partition, /* new CPU ownership of elements */
191 &(in->MPIInfo->comm));
192 /* printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1)); */
193 delete[] ubvec;
194 delete[] tpwgts;
195 }
196 else
197 {
198 for (i = 0; i < myNumVertices; ++i)
199 partition[i] = 0; /* CPU 0 owns it */
200 }
201 #else
202 for (i = 0; i < myNumVertices; ++i)
203 partition[i] = myRank; /* CPU 0 owns it */
204 #endif
205
206 }
207
208 Paso_Pattern_free(pattern);
209
210 /* create a new distribution and labelling of the DOF */
211 memset(new_distribution, 0, mpiSize_size);
212 #pragma omp parallel private(loc_partition_count)
213 {
214 loc_partition_count = new dim_t[mpiSize];
215 memset(loc_partition_count, 0, mpiSize_size);
216 #pragma omp for private(i)
217 for (i = 0; i < myNumVertices; ++i)
218 loc_partition_count[partition[i]]++;
219 #pragma omp critical
220 {
221 for (i = 0; i < mpiSize; ++i)
222 new_distribution[i] += loc_partition_count[i];
223 }
224 delete[] loc_partition_count;
225 }
226 #ifdef ESYS_MPI
227 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
228 MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
229 #else
230 for (i = 0; i < mpiSize; ++i)
231 recvbuf[i] = new_distribution[i];
232 #endif
233 new_distribution[0] = 0;
234 for (rank = 0; rank < mpiSize; rank++)
235 {
236 c = 0;
237 for (i = 0; i < myRank; ++i)
238 c += recvbuf[rank + mpiSize * i];
239 for (i = 0; i < myNumVertices; ++i)
240 {
241 if (rank == partition[i])
242 {
243 newGlobalDOFID[i] = new_distribution[rank] + c;
244 c++;
245 }
246 }
247 for (i = myRank + 1; i < mpiSize; ++i)
248 c += recvbuf[rank + mpiSize * i];
249 new_distribution[rank + 1] = new_distribution[rank] + c;
250 }
251 delete[] recvbuf;
252
253 /* now the overlap needs to be created by sending the partition around */
254 #ifdef ESYS_MPI
255 dest = Esys_MPIInfo_mod(mpiSize, myRank + 1);
256 source = Esys_MPIInfo_mod(mpiSize, myRank - 1);
257 #endif
258 current_rank = myRank;
259 #pragma omp parallel for private(i)
260 for (i = 0; i < in->Nodes->numNodes; ++i)
261 setNewDOFId[i] = TRUE;
262
263 for (p = 0; p < mpiSize; ++p)
264 {
265
266 firstVertex = distribution[current_rank];
267 lastVertex = distribution[current_rank + 1];
268 #pragma omp parallel for private(i,j,k)
269 for (i = 0; i < in->Nodes->numNodes; ++i)
270 {
271 k = in->Nodes->globalDegreesOfFreedom[i];
272 if (setNewDOFId[i] && (firstVertex <= k) && (k < lastVertex))
273 {
274 in->Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
275 setNewDOFId[i] = FALSE;
276 }
277 }
278
279 if (p < mpiSize - 1)
280 { /* the final send can be skipped */
281 #ifdef ESYS_MPI
282 MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_INT,
283 dest, in->MPIInfo->msg_tag_counter,
284 source, in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
285 #endif
286 in->MPIInfo->msg_tag_counter++;
287 current_rank = Esys_MPIInfo_mod(mpiSize, current_rank - 1);
288 }
289 }
290 for (i = 0; i < mpiSize + 1; ++i)
291 distribution[i] = new_distribution[i];
292 }
293 delete[] index_list;
294 }
295 delete[] newGlobalDOFID;
296 delete[] setNewDOFId;
297 delete[] new_distribution;
298 delete[] partition_count;
299 delete[] partition;
300 delete[] xyz;
301 return;
302 }

Properties

Name Value
svn:mergeinfo /branches/lapack2681/finley/src/Mesh_optimizeDOFDistribution.cpp:2682-2741 /branches/pasowrap/dudley/src/Mesh_optimizeDOFDistribution.cpp:3661-3674 /branches/py3_attempt2/dudley/src/Mesh_optimizeDOFDistribution.cpp:3871-3891 /branches/restext/finley/src/Mesh_optimizeDOFDistribution.cpp:2610-2624 /branches/ripleygmg_from_3668/dudley/src/Mesh_optimizeDOFDistribution.cpp:3669-3791 /branches/stage3.0/finley/src/Mesh_optimizeDOFDistribution.cpp:2569-2590 /branches/symbolic_from_3470/dudley/src/Mesh_optimizeDOFDistribution.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/dudley/src/Mesh_optimizeDOFDistribution.cpp:3517-3974 /release/3.0/finley/src/Mesh_optimizeDOFDistribution.cpp:2591-2601 /trunk/dudley/src/Mesh_optimizeDOFDistribution.cpp:4257-4344 /trunk/ripley/test/python/dudley/src/Mesh_optimizeDOFDistribution.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26