/[escript]/branches/doubleplusgood/dudley/src/Mesh_optimizeDOFDistribution.cpp
ViewVC logotype

Annotation of /branches/doubleplusgood/dudley/src/Mesh_optimizeDOFDistribution.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4332 - (hide annotations)
Thu Mar 21 04:21:14 2013 UTC (6 years ago) by jfenwick
File size: 9924 byte(s)
like sand though the hourglass
1 ksteube 1315
2 jfenwick 3981 /*****************************************************************************
3 ksteube 1811 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 ksteube 1811 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 ksteube 1315
16 jfenwick 3981 /************************************************************************************/
17 ksteube 1315
18 jfenwick 3086 /* Dudley: Mesh: optimizes the distribution of DOFs across processors */
19 ksteube 1315 /* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */
20     /* accordingly but the mesh has not been redesitributed yet */
21    
22 jfenwick 3981 /************************************************************************************/
23 ksteube 1315
24     #include "Mesh.h"
25     #include "IndexList.h"
26     #ifdef _OPENMP
27     #include <omp.h>
28     #endif
29 gross 1720 #ifdef USE_PARMETIS
30 ksteube 1459 #include "parmetis.h"
31     #endif
32 ksteube 1315
33 jfenwick 3981 /************************************************************************************
34 lgao 2897 Check whether there is any node which has no vertex. In case
35     such node exists, we don't use parmetis since parmetis requires
36     that every node has at least 1 vertex (at line 129 of file
37     "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
38     any node has no vertex).
39 jfenwick 3981 ************************************************************************************/
40 lgao 2897 #ifdef USE_PARMETIS
41 jfenwick 3224 int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t * distribution, MPI_Comm * comm)
42 lgao 2897 {
43 jfenwick 3224 dim_t i, len;
44     int ret_val = 1;
45 lgao 2897
46 jfenwick 3224 if (rank == 0)
47     {
48     for (i = 0; i < mpiSize; i++)
49     {
50     len = distribution[i + 1] - distribution[i];
51     if (len == 0)
52     {
53     ret_val = 0;
54     break;
55     }
56     }
57 lgao 2897 }
58 jfenwick 3224 MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm);
59     if (ret_val == 0)
60     printf("INFO: Parmetis is not used since some nodes have no vertex!\n");
61     return ret_val;
62 lgao 2897 }
63     #endif
64    
65 jfenwick 3981 /************************************************************************************/
66 lgao 2897
67 jfenwick 3224 void Dudley_Mesh_optimizeDOFDistribution(Dudley_Mesh * in, dim_t * distribution)
68     {
69 lgao 2897
70 jfenwick 3224 dim_t dim, i, j, k, myNumVertices, p, mpiSize, len, globalNumVertices, *partition_count = NULL, *new_distribution =
71     NULL, *loc_partition_count = NULL;
72     bool_t *setNewDOFId = NULL;
73     index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID = NULL;
74     size_t mpiSize_size;
75     index_t *partition = NULL;
76     Paso_Pattern *pattern = NULL;
77 caltinay 3603 Esys_MPI_rank myRank, current_rank, rank;
78 jfenwick 3224 Dudley_IndexList *index_list = NULL;
79     float *xyz = NULL;
80     int c;
81 ksteube 1315
82 jfenwick 3231 #ifdef ESYS_MPI
83 caltinay 3603 Esys_MPI_rank dest, source;
84 jfenwick 3224 MPI_Status status;
85     #endif
86 ksteube 1315
87 jfenwick 3224 if (in == NULL)
88     return;
89     if (in->Nodes == NULL)
90     return;
91 ksteube 1315
92 jfenwick 3224 myRank = in->MPIInfo->rank;
93     mpiSize = in->MPIInfo->size;
94     mpiSize_size = mpiSize * sizeof(dim_t);
95     dim = in->Nodes->numDim;
96     /* first step is to distribute the elements according to a global X of DOF */
97 ksteube 1315
98 jfenwick 3224 myFirstVertex = distribution[myRank];
99     myLastVertex = distribution[myRank + 1];
100     myNumVertices = myLastVertex - myFirstVertex;
101     globalNumVertices = distribution[mpiSize];
102     len = 0;
103     for (p = 0; p < mpiSize; ++p)
104     len = MAX(len, distribution[p + 1] - distribution[p]);
105 jfenwick 4332 partition = new index_t[len]; /* len is used for the sending around of partition later on */
106     xyz = new float[myNumVertices * dim];
107     partition_count = new dim_t[mpiSize + 1];
108     new_distribution = new dim_t[mpiSize + 1];
109     newGlobalDOFID = new index_t[len];
110     setNewDOFId = new bool_t[in->Nodes->numNodes];
111 jfenwick 3224 if (!
112     (Dudley_checkPtr(partition) || Dudley_checkPtr(xyz) || Dudley_checkPtr(partition_count)
113     || Dudley_checkPtr(partition_count) || Dudley_checkPtr(newGlobalDOFID) || Dudley_checkPtr(setNewDOFId)))
114     {
115 jfenwick 4332 dim_t *recvbuf = new dim_t[mpiSize * mpiSize];
116 ksteube 1315
117 jfenwick 3224 /* set the coordinates: */
118     /* it is assumed that at least one node on this processor provides a coordinate */
119     #pragma omp parallel for private(i,j,k)
120     for (i = 0; i < in->Nodes->numNodes; ++i)
121     {
122     k = in->Nodes->globalDegreesOfFreedom[i] - myFirstVertex;
123     if ((k >= 0) && (k < myNumVertices))
124     {
125     for (j = 0; j < dim; ++j)
126     xyz[k * dim + j] = (float)(in->Nodes->Coordinates[INDEX2(j, i, dim)]);
127     }
128     }
129 ksteube 1315
130 jfenwick 4332 index_list = new Dudley_IndexList[myNumVertices];
131 jfenwick 3224 /* ksteube CSR of DOF IDs */
132     /* create the adjacency structure xadj and adjncy */
133     if (!Dudley_checkPtr(index_list))
134     {
135     #pragma omp parallel private(i)
136     {
137     #pragma omp for schedule(static)
138     for (i = 0; i < myNumVertices; ++i)
139     {
140     index_list[i].extension = NULL;
141     index_list[i].n = 0;
142     }
143     /* ksteube build CSR format */
144     /* insert contributions from element matrices into colums index index_list: */
145     Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
146     in->Elements,
147     in->Nodes->globalDegreesOfFreedom,
148     in->Nodes->globalDegreesOfFreedom);
149     Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
150     in->FaceElements,
151     in->Nodes->globalDegreesOfFreedom,
152     in->Nodes->globalDegreesOfFreedom);
153     Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
154     in->Points, in->Nodes->globalDegreesOfFreedom,
155     in->Nodes->globalDegreesOfFreedom);
156     }
157 ksteube 1315
158 jfenwick 3224 /* create the local matrix pattern */
159     pattern = Dudley_IndexList_createPattern(0, myNumVertices, index_list, 0, globalNumVertices, 0);
160 ksteube 1315
161 jfenwick 3224 /* clean up index list */
162     if (index_list != NULL)
163     {
164     #pragma omp parallel for private(i)
165     for (i = 0; i < myNumVertices; ++i)
166     Dudley_IndexList_free(index_list[i].extension);
167     }
168 ksteube 1315
169 jfenwick 3224 if (Dudley_noError())
170     {
171 ksteube 1315
172 gross 1720 #ifdef USE_PARMETIS
173    
174 jfenwick 3224 if (mpiSize > 1 && Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm)) > 0)
175     {
176     int i;
177     int wgtflag = 0;
178     int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
179     int ncon = 1;
180     int edgecut;
181     int options[2];
182 jfenwick 4332 float *tpwgts = new float[ncon * mpiSize];
183     float *ubvec = new float[ncon];
184 jfenwick 3224 for (i = 0; i < ncon * mpiSize; i++)
185     tpwgts[i] = 1.0 / (float)mpiSize;
186     for (i = 0; i < ncon; i++)
187     ubvec[i] = 1.05;
188     options[0] = 3;
189     options[1] = 15;
190     ParMETIS_V3_PartGeomKway(distribution, pattern->ptr, pattern->index, NULL, NULL, &wgtflag, &numflag, &dim, xyz, &ncon, &mpiSize, tpwgts, ubvec, options, &edgecut, partition, /* new CPU ownership of elements */
191     &(in->MPIInfo->comm));
192     /* printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1)); */
193 jfenwick 4332 delete[] ubvec;
194     delete[] tpwgts;
195 jfenwick 3224 }
196     else
197     {
198     for (i = 0; i < myNumVertices; ++i)
199     partition[i] = 0; /* CPU 0 owns it */
200     }
201 ksteube 1459 #else
202 jfenwick 3224 for (i = 0; i < myNumVertices; ++i)
203     partition[i] = myRank; /* CPU 0 owns it */
204 ksteube 1459 #endif
205    
206 jfenwick 3224 }
207 ksteube 1315
208 jfenwick 3224 Paso_Pattern_free(pattern);
209 ksteube 1459
210 jfenwick 3224 /* create a new distributioin and labeling of the DOF */
211     memset(new_distribution, 0, mpiSize_size);
212     #pragma omp parallel private(loc_partition_count)
213     {
214     loc_partition_count = THREAD_MEMALLOC(mpiSize, dim_t);
215     memset(loc_partition_count, 0, mpiSize_size);
216     #pragma omp for private(i)
217     for (i = 0; i < myNumVertices; ++i)
218     loc_partition_count[partition[i]]++;
219     #pragma omp critical
220     {
221     for (i = 0; i < mpiSize; ++i)
222     new_distribution[i] += loc_partition_count[i];
223     }
224     THREAD_MEMFREE(loc_partition_count);
225     }
226 jfenwick 3231 #ifdef ESYS_MPI
227 jfenwick 3224 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
228     MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
229     #else
230     for (i = 0; i < mpiSize; ++i)
231     recvbuf[i] = new_distribution[i];
232     #endif
233     new_distribution[0] = 0;
234     for (rank = 0; rank < mpiSize; rank++)
235     {
236     c = 0;
237     for (i = 0; i < myRank; ++i)
238     c += recvbuf[rank + mpiSize * i];
239     for (i = 0; i < myNumVertices; ++i)
240     {
241     if (rank == partition[i])
242     {
243     newGlobalDOFID[i] = new_distribution[rank] + c;
244     c++;
245     }
246     }
247     for (i = myRank + 1; i < mpiSize; ++i)
248     c += recvbuf[rank + mpiSize * i];
249     new_distribution[rank + 1] = new_distribution[rank] + c;
250     }
251 jfenwick 4332 delete[] recvbuf;
252 ksteube 1315
253 jfenwick 3224 /* now the overlap needs to be created by sending the partition around */
254 caltinay 3603 #ifdef ESYS_MPI
255 jfenwick 3227 dest = Esys_MPIInfo_mod(mpiSize, myRank + 1);
256     source = Esys_MPIInfo_mod(mpiSize, myRank - 1);
257 caltinay 3603 #endif
258 jfenwick 3224 current_rank = myRank;
259     #pragma omp parallel for private(i)
260     for (i = 0; i < in->Nodes->numNodes; ++i)
261     setNewDOFId[i] = TRUE;
262 ksteube 1315
263 jfenwick 3224 for (p = 0; p < mpiSize; ++p)
264     {
265 ksteube 1315
266 jfenwick 3224 firstVertex = distribution[current_rank];
267     lastVertex = distribution[current_rank + 1];
268     #pragma omp parallel for private(i,j,k)
269     for (i = 0; i < in->Nodes->numNodes; ++i)
270     {
271     k = in->Nodes->globalDegreesOfFreedom[i];
272     if (setNewDOFId[i] && (firstVertex <= k) && (k < lastVertex))
273     {
274     in->Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
275     setNewDOFId[i] = FALSE;
276     }
277     }
278    
279     if (p < mpiSize - 1)
280     { /* the final send can be skipped */
281 jfenwick 3231 #ifdef ESYS_MPI
282 jfenwick 3224 MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_INT,
283     dest, in->MPIInfo->msg_tag_counter,
284     source, in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
285     #endif
286     in->MPIInfo->msg_tag_counter++;
287 jfenwick 3227 current_rank = Esys_MPIInfo_mod(mpiSize, current_rank - 1);
288 jfenwick 3224 }
289     }
290     for (i = 0; i < mpiSize + 1; ++i)
291     distribution[i] = new_distribution[i];
292     }
293 jfenwick 4332 delete[] index_list;
294 jfenwick 3224 }
295 jfenwick 4332 delete[] newGlobalDOFID;
296     delete[] setNewDOFId;
297     delete[] new_distribution;
298     delete[] partition_count;
299     delete[] partition;
300     delete[] xyz;
301 jfenwick 3224 return;
302 ksteube 1315 }

  ViewVC Help
Powered by ViewVC 1.1.26