/[escript]/trunk/finley/src/Mesh_optimizeDOFDistribution.c
ViewVC logotype

Contents of /trunk/finley/src/Mesh_optimizeDOFDistribution.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3603 - (show annotations)
Mon Sep 19 03:42:53 2011 UTC (7 years, 9 months ago) by caltinay
File MIME type: text/plain
File size: 11902 byte(s)
Fixed some unused var errors in non-mpi build.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Finley: Mesh: optimizes the distribution of DOFs across processors */
18 /* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */
19 /* accordingly but the mesh has not been redesitributed yet */
20
21 /**************************************************************/
22
23 #include "Mesh.h"
24 #include "IndexList.h"
25 #ifdef _OPENMP
26 #include <omp.h>
27 #endif
28 #ifdef USE_PARMETIS
29 #include "parmetis.h"
30 #endif
31
32 /**************************************************************
33 Check whether there is any node which has no vertex. In case
34 such node exists, we don't use parmetis since parmetis requires
35 that every node has at least 1 vertex (at line 129 of file
36 "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
37 any node has no vertex).
38 **************************************************************/
39 #ifdef USE_PARMETIS
40 int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t *distribution, MPI_Comm *comm)
41 {
42 dim_t i, len;
43 int ret_val = 1;
44
45 if (rank == 0){
46 for (i=0; i<mpiSize; i++){
47 len = distribution[i+1] - distribution[i];
48 if (len == 0){
49 ret_val = 0;
50 break;
51 }
52 }
53 }
54 MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm);
55 if (ret_val == 0)
56 printf("INFO: Parmetis is not used since some nodes have no vertex!\n");
57 return ret_val;
58 }
59 #endif
60
61
62
63 /**************************************************************/
64
65 void Finley_Mesh_optimizeDOFDistribution(Finley_Mesh* in,dim_t *distribution) {
66
67 dim_t dim, i,j,k, myNumVertices,p, mpiSize, len, globalNumVertices,*partition_count=NULL, *new_distribution=NULL, *loc_partition_count=NULL;
68 bool_t *setNewDOFId=NULL;
69 index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID=NULL;
70 size_t mpiSize_size;
71 index_t* partition=NULL;
72 Paso_Pattern *pattern=NULL;
73 Esys_MPI_rank myRank,current_rank, rank;
74 Finley_IndexList* index_list=NULL;
75 float *xyz=NULL;
76 int c;
77
78 #ifdef ESYS_MPI
79 Esys_MPI_rank dest, source;
80 MPI_Status status;
81 #endif
82
83 if (in==NULL) return;
84 if (in->Nodes == NULL) return;
85
86 myRank=in->MPIInfo->rank;
87 mpiSize=in->MPIInfo->size;
88 mpiSize_size=mpiSize*sizeof(dim_t);
89 dim=in->Nodes->numDim;
90 /* first step is to distribute the elements according to a global X of DOF */
91
92 myFirstVertex=distribution[myRank];
93 myLastVertex=distribution[myRank+1];
94 myNumVertices=myLastVertex-myFirstVertex;
95 globalNumVertices=distribution[mpiSize];
96 len=0;
97 for (p=0;p<mpiSize;++p) len=MAX(len,distribution[p+1]-distribution[p]);
98 partition=TMPMEMALLOC(len,index_t); /* len is used for the sending around of partition later on */
99 xyz=TMPMEMALLOC(myNumVertices*dim,float);
100 partition_count=TMPMEMALLOC(mpiSize+1,dim_t);
101 new_distribution=TMPMEMALLOC(mpiSize+1,dim_t);
102 newGlobalDOFID=TMPMEMALLOC(len,index_t);
103 setNewDOFId=TMPMEMALLOC(in->Nodes->numNodes,bool_t);
104 if (!(Finley_checkPtr(partition) || Finley_checkPtr(xyz) || Finley_checkPtr(partition_count) || Finley_checkPtr(partition_count) || Finley_checkPtr(newGlobalDOFID) || Finley_checkPtr(setNewDOFId))) {
105 dim_t *recvbuf=TMPMEMALLOC(mpiSize*mpiSize,dim_t);
106
107 /* set the coordinates: */
108 /* it is assumed that at least one node on this processor provides a coordinate */
109 #pragma omp parallel for private(i,j,k)
110 for (i=0;i<in->Nodes->numNodes;++i) {
111 k=in->Nodes->globalDegreesOfFreedom[i]-myFirstVertex;
112 if ((k>=0) && (k<myNumVertices)) {
113 for (j=0;j<dim;++j) xyz[k*dim+j]=(float)(in->Nodes->Coordinates[INDEX2(j,i,dim)]);
114 }
115 }
116
117 index_list=TMPMEMALLOC(myNumVertices,Finley_IndexList);
118 /* ksteube CSR of DOF IDs */
119 /* create the adjacency structure xadj and adjncy */
120 if (! Finley_checkPtr(index_list)) {
121 #pragma omp parallel private(i)
122 {
123 #pragma omp for schedule(static)
124 for(i=0;i<myNumVertices;++i) {
125 index_list[i].extension=NULL;
126 index_list[i].n=0;
127 }
128 /* ksteube build CSR format */
129 /* insert contributions from element matrices into colums index index_list: */
130 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
131 in->Elements,in->Nodes->globalDegreesOfFreedom,
132 in->Nodes->globalDegreesOfFreedom);
133 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
134 in->FaceElements,in->Nodes->globalDegreesOfFreedom,
135 in->Nodes->globalDegreesOfFreedom);
136 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
137 in->ContactElements,in->Nodes->globalDegreesOfFreedom,
138 in->Nodes->globalDegreesOfFreedom);
139 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
140 in->Points,in->Nodes->globalDegreesOfFreedom,
141 in->Nodes->globalDegreesOfFreedom);
142 }
143
144 /* create the local matrix pattern */
145 pattern=Finley_IndexList_createPattern(0,myNumVertices,index_list,0,globalNumVertices,0);
146
147 /* clean up index list */
148 if (index_list!=NULL) {
149 #pragma omp parallel for private(i)
150 for(i=0;i<myNumVertices;++i) Finley_IndexList_free(index_list[i].extension);
151 }
152
153 if (Finley_noError()) {
154
155 #ifdef USE_PARMETIS
156
157 if (mpiSize>1 &&
158 Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm))>0 ) {
159 int i;
160 int wgtflag = 0;
161 int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
162 int ncon = 1;
163 int edgecut;
164 int options[2];
165 float *tpwgts = TMPMEMALLOC(ncon*mpiSize,float);
166 float *ubvec = TMPMEMALLOC(ncon,float);
167 for (i=0; i<ncon*mpiSize; i++) tpwgts[i] = 1.0/(float)mpiSize;
168 for (i=0; i<ncon; i++) ubvec[i] = 1.05;
169 options[0] = 3;
170 options[1] = 15;
171 ParMETIS_V3_PartGeomKway(distribution,
172 pattern->ptr,
173 pattern->index,
174 NULL,
175 NULL,
176 &wgtflag,
177 &numflag,
178 &dim,
179 xyz,
180 &ncon,
181 &mpiSize,
182 tpwgts,
183 ubvec,
184 options,
185 &edgecut,
186 partition, /* new CPU ownership of elements */
187 &(in->MPIInfo->comm));
188 /* printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1)); */
189 TMPMEMFREE(ubvec);
190 TMPMEMFREE(tpwgts);
191 } else {
192 for (i=0;i<myNumVertices;++i) partition[i]=0; /* CPU 0 owns it */
193 }
194 #else
195 for (i=0;i<myNumVertices;++i) partition[i]=myRank; /* CPU 0 owns it */
196 #endif
197
198 }
199
200 Paso_Pattern_free(pattern);
201
202 /* create a new distributioin and labeling of the DOF */
203 memset(new_distribution,0,mpiSize_size);
204 #pragma omp parallel private(loc_partition_count)
205 {
206 loc_partition_count=THREAD_MEMALLOC(mpiSize,dim_t);
207 memset(loc_partition_count,0,mpiSize_size);
208 #pragma omp for private(i)
209 for (i=0;i<myNumVertices;++i) loc_partition_count[partition[i]]++ ;
210 #pragma omp critical
211 {
212 for (i=0;i<mpiSize;++i) new_distribution[i]+=loc_partition_count[i];
213 }
214 THREAD_MEMFREE(loc_partition_count);
215 }
216 #ifdef ESYS_MPI
217 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
218 MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
219 #else
220 for (i=0;i<mpiSize;++i) recvbuf[i]=new_distribution[i];
221 #endif
222 new_distribution[0]=0;
223 for (rank=0; rank<mpiSize;rank++) {
224 c=0;
225 for (i=0;i<myRank;++i) c+=recvbuf[rank+mpiSize*i];
226 for (i=0;i<myNumVertices;++i) {
227 if (rank==partition[i]) {
228 newGlobalDOFID[i]=new_distribution[rank]+c;
229 c++;
230 }
231 }
232 for (i=myRank+1;i<mpiSize;++i) c+=recvbuf[rank+mpiSize*i];
233 new_distribution[rank+1]=new_distribution[rank]+c;
234 }
235 TMPMEMFREE(recvbuf);
236
237 /* now the overlap needs to be created by sending the partition around*/
238 #ifdef ESYS_MPI
239 dest=Esys_MPIInfo_mod(mpiSize, myRank + 1);
240 source=Esys_MPIInfo_mod(mpiSize, myRank - 1);
241 #endif
242 current_rank=myRank;
243 #pragma omp parallel for private(i)
244 for (i=0;i<in->Nodes->numNodes;++i) setNewDOFId[i]=TRUE;
245
246 for (p=0; p< mpiSize; ++p) {
247
248 firstVertex=distribution[current_rank];
249 lastVertex=distribution[current_rank+1];
250 #pragma omp parallel for private(i,j,k)
251 for (i=0;i<in->Nodes->numNodes;++i) {
252 k=in->Nodes->globalDegreesOfFreedom[i];
253 if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) {
254 in->Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex];
255 setNewDOFId[i]=FALSE;
256 }
257 }
258
259 if (p<mpiSize-1) { /* the final send can be skipped */
260 #ifdef ESYS_MPI
261 MPI_Sendrecv_replace(newGlobalDOFID,len, MPI_INT,
262 dest, in->MPIInfo->msg_tag_counter,
263 source, in->MPIInfo->msg_tag_counter,
264 in->MPIInfo->comm,&status);
265 #endif
266 in->MPIInfo->msg_tag_counter++;
267 current_rank=Esys_MPIInfo_mod(mpiSize, current_rank-1);
268 }
269 }
270 for (i=0;i<mpiSize+1;++i) distribution[i]=new_distribution[i];
271 }
272 TMPMEMFREE(index_list);
273 }
274 TMPMEMFREE(newGlobalDOFID);
275 TMPMEMFREE(setNewDOFId);
276 TMPMEMFREE(new_distribution);
277 TMPMEMFREE(partition_count);
278 TMPMEMFREE(partition);
279 TMPMEMFREE(xyz);
280 return;
281 }

  ViewVC Help
Powered by ViewVC 1.1.26