1 |
|
2 |
/* $Id$ */ |
3 |
|
4 |
/******************************************************* |
5 |
* |
6 |
* Copyright 2003-2007 by ACceSS MNRF |
7 |
* Copyright 2007 by University of Queensland |
8 |
* |
9 |
* http://esscc.uq.edu.au |
10 |
* Primary Business: Queensland, Australia |
11 |
* Licensed under the Open Software License version 3.0 |
12 |
* http://www.opensource.org/licenses/osl-3.0.php |
13 |
* |
14 |
*******************************************************/ |
15 |
|
16 |
/**************************************************************/ |
17 |
|
18 |
/* Finley: Mesh: optimizes the distribution of DOFs across processors */ |
19 |
/* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */ |
20 |
/* accordingly but the mesh has not been redesitributed yet */ |
21 |
|
22 |
/**************************************************************/ |
23 |
|
24 |
#include "Mesh.h" |
25 |
#include "IndexList.h" |
26 |
#ifdef _OPENMP |
27 |
#include <omp.h> |
28 |
#endif |
29 |
#ifdef USE_PARMETIS |
30 |
#include "parmetis.h" |
31 |
#endif |
32 |
|
33 |
/**************************************************************/ |
34 |
|
35 |
void Finley_Mesh_optimizeDOFDistribution(Finley_Mesh* in,dim_t *distribution) { |
36 |
|
37 |
dim_t dim, i,j,k, myNumVertices,p, mpiSize, len, globalNumVertices,*partition_count=NULL, *new_distribution=NULL, *loc_partition_count=NULL; |
38 |
bool_t *setNewDOFId=NULL; |
39 |
index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID=NULL; |
40 |
size_t mpiSize_size; |
41 |
index_t* partition=NULL; |
42 |
Paso_Pattern *pattern=NULL; |
43 |
Paso_MPI_rank myRank,dest,source,current_rank, rank; |
44 |
Finley_IndexList* index_list=NULL; |
45 |
float *xyz=NULL; |
46 |
int c; |
47 |
|
48 |
#ifdef PASO_MPI |
49 |
MPI_Status status; |
50 |
#endif |
51 |
|
52 |
if (in==NULL) return; |
53 |
if (in->Nodes == NULL) return; |
54 |
|
55 |
myRank=in->MPIInfo->rank; |
56 |
mpiSize=in->MPIInfo->size; |
57 |
mpiSize_size=mpiSize*sizeof(dim_t); |
58 |
dim=in->Nodes->numDim; |
59 |
/* first step is to distribute the elements according to a global X of DOF */ |
60 |
|
61 |
myFirstVertex=distribution[myRank]; |
62 |
myLastVertex=distribution[myRank+1]; |
63 |
myNumVertices=myLastVertex-myFirstVertex; |
64 |
globalNumVertices=distribution[mpiSize]; |
65 |
len=0; |
66 |
for (p=0;p<mpiSize;++p) len=MAX(len,distribution[p+1]-distribution[p]); |
67 |
partition=TMPMEMALLOC(len,index_t); /* len is used for the sending around of partition later on */ |
68 |
xyz=TMPMEMALLOC(myNumVertices*dim,float); |
69 |
partition_count=TMPMEMALLOC(mpiSize+1,dim_t); |
70 |
new_distribution=TMPMEMALLOC(mpiSize+1,dim_t); |
71 |
newGlobalDOFID=TMPMEMALLOC(len,index_t); |
72 |
setNewDOFId=TMPMEMALLOC(in->Nodes->numNodes,bool_t); |
73 |
if (!(Finley_checkPtr(partition) || Finley_checkPtr(xyz) || Finley_checkPtr(partition_count) || Finley_checkPtr(partition_count) || Finley_checkPtr(newGlobalDOFID) || Finley_checkPtr(setNewDOFId))) { |
74 |
dim_t *recvbuf=TMPMEMALLOC(mpiSize*mpiSize,dim_t); |
75 |
|
76 |
/* set the coordinates: */ |
77 |
/* it is assumed that at least one node on this processor provides a coordinate */ |
78 |
#pragma omp parallel for private(i,j,k) |
79 |
for (i=0;i<in->Nodes->numNodes;++i) { |
80 |
k=in->Nodes->globalDegreesOfFreedom[i]-myFirstVertex; |
81 |
if ((k>=0) && (k<myNumVertices)) { |
82 |
for (j=0;j<dim;++j) xyz[k*dim+j]=(float)(in->Nodes->Coordinates[INDEX2(j,i,dim)]); |
83 |
} |
84 |
} |
85 |
|
86 |
index_list=TMPMEMALLOC(myNumVertices,Finley_IndexList); |
87 |
/* ksteube CSR of DOF IDs */ |
88 |
/* create the adjacency structure xadj and adjncy */ |
89 |
if (! Finley_checkPtr(index_list)) { |
90 |
#pragma omp parallel private(i) |
91 |
{ |
92 |
#pragma omp for schedule(static) |
93 |
for(i=0;i<myNumVertices;++i) { |
94 |
index_list[i].extension=NULL; |
95 |
index_list[i].n=0; |
96 |
} |
97 |
/* ksteube build CSR format */ |
98 |
/* insert contributions from element matrices into colums index index_list: */ |
99 |
Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex, |
100 |
in->Elements,in->Nodes->globalDegreesOfFreedom, |
101 |
in->Nodes->globalDegreesOfFreedom); |
102 |
Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex, |
103 |
in->FaceElements,in->Nodes->globalDegreesOfFreedom, |
104 |
in->Nodes->globalDegreesOfFreedom); |
105 |
Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex, |
106 |
in->ContactElements,in->Nodes->globalDegreesOfFreedom, |
107 |
in->Nodes->globalDegreesOfFreedom); |
108 |
Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex, |
109 |
in->Points,in->Nodes->globalDegreesOfFreedom, |
110 |
in->Nodes->globalDegreesOfFreedom); |
111 |
} |
112 |
|
113 |
/* create the local matrix pattern */ |
114 |
pattern=Finley_IndexList_createPattern(0,myNumVertices,index_list,0,globalNumVertices,0); |
115 |
|
116 |
/* clean up index list */ |
117 |
if (index_list!=NULL) { |
118 |
#pragma omp parallel for private(i) |
119 |
for(i=0;i<myNumVertices;++i) Finley_IndexList_free(index_list[i].extension); |
120 |
} |
121 |
{ |
122 |
int s=0, s0; |
123 |
for (i=0;i<myNumVertices;++i) { |
124 |
s0=s; |
125 |
for (j=pattern->ptr[i];j<pattern->ptr[i+1];++j) { |
126 |
if (pattern->index[j] != myFirstVertex+i) { |
127 |
pattern->index[s]=pattern->index[j]; |
128 |
s++; |
129 |
} |
130 |
} |
131 |
pattern->ptr[i]=s0; |
132 |
} |
133 |
pattern->ptr[myNumVertices]=s; |
134 |
} |
135 |
|
136 |
|
137 |
if (Finley_noError()) { |
138 |
|
139 |
#ifdef USE_PARMETIS |
140 |
|
141 |
if (in->MPIInfo->size>1) { |
142 |
int i; |
143 |
int wgtflag = 0; |
144 |
int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */ |
145 |
int ncon = 1; |
146 |
int edgecut; |
147 |
int options[2]; |
148 |
float *tpwgts = TMPMEMALLOC(ncon*mpiSize,float); |
149 |
float *ubvec = TMPMEMALLOC(ncon,float); |
150 |
for (i=0; i<ncon*mpiSize; i++) tpwgts[i] = 1.0/(float)mpiSize; |
151 |
for (i=0; i<ncon; i++) ubvec[i] = 1.05; |
152 |
options[0] = 3; |
153 |
options[1] = 15; |
154 |
ParMETIS_V3_PartGeomKway(distribution, |
155 |
pattern->ptr, |
156 |
pattern->index, |
157 |
NULL, |
158 |
NULL, |
159 |
&wgtflag, |
160 |
&numflag, |
161 |
&dim, |
162 |
xyz, |
163 |
&ncon, |
164 |
&mpiSize, |
165 |
tpwgts, |
166 |
ubvec, |
167 |
options, |
168 |
&edgecut, |
169 |
partition, /* new CPU ownership of elements */ |
170 |
&(in->MPIInfo->comm)); |
171 |
printf("ParMETIS number of edges cut by partitioning: %d\n", edgecut); |
172 |
TMPMEMFREE(ubvec); |
173 |
TMPMEMFREE(tpwgts); |
174 |
} else { |
175 |
for (i=0;i<myNumVertices;++i) partition[i]=0; /* CPU 0 owns it */ |
176 |
} |
177 |
#else |
178 |
for (i=0;i<myNumVertices;++i) partition[i]=myRank; /* CPU 0 owns it */ |
179 |
#endif |
180 |
|
181 |
} |
182 |
|
183 |
Paso_Pattern_free(pattern); |
184 |
|
185 |
/* create a new distributioin and labeling of the DOF */ |
186 |
memset(new_distribution,0,mpiSize_size); |
187 |
#pragma omp parallel private(loc_partition_count) |
188 |
{ |
189 |
loc_partition_count=THREAD_MEMALLOC(mpiSize,dim_t); |
190 |
memset(loc_partition_count,0,mpiSize_size); |
191 |
#pragma omp for private(i) |
192 |
for (i=0;i<myNumVertices;++i) loc_partition_count[partition[i]]++ ; |
193 |
#pragma omp critical |
194 |
{ |
195 |
for (i=0;i<mpiSize;++i) new_distribution[i]+=loc_partition_count[i]; |
196 |
} |
197 |
THREAD_MEMFREE(loc_partition_count); |
198 |
} |
199 |
#ifdef PASO_MPI |
200 |
/* recvbuf will be the concatenation of each CPU's contribution to new_distribution */ |
201 |
MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm); |
202 |
#else |
203 |
for (i=0;i<mpiSize;++i) recvbuf[i]=new_distribution[i]; |
204 |
#endif |
205 |
new_distribution[0]=0; |
206 |
for (rank=0; rank<mpiSize;rank++) { |
207 |
c=0; |
208 |
for (i=0;i<myRank;++i) c+=recvbuf[rank+mpiSize*i]; |
209 |
for (i=0;i<myNumVertices;++i) { |
210 |
if (rank==partition[i]) { |
211 |
newGlobalDOFID[i]=new_distribution[rank]+c; |
212 |
c++; |
213 |
} |
214 |
} |
215 |
for (i=myRank+1;i<mpiSize;++i) c+=recvbuf[rank+mpiSize*i]; |
216 |
new_distribution[rank+1]=new_distribution[rank]+c; |
217 |
} |
218 |
TMPMEMFREE(recvbuf); |
219 |
|
220 |
/* now the overlap needs to be created by sending the partition around*/ |
221 |
|
222 |
dest=Paso_MPIInfo_mod(mpiSize, myRank + 1); |
223 |
source=Paso_MPIInfo_mod(mpiSize, myRank - 1); |
224 |
current_rank=myRank; |
225 |
#pragma omp parallel for private(i) |
226 |
for (i=0;i<in->Nodes->numNodes;++i) setNewDOFId[i]=TRUE; |
227 |
|
228 |
for (p=0; p< mpiSize; ++p) { |
229 |
|
230 |
firstVertex=distribution[current_rank]; |
231 |
lastVertex=distribution[current_rank+1]; |
232 |
#pragma omp parallel for private(i,j,k) |
233 |
for (i=0;i<in->Nodes->numNodes;++i) { |
234 |
k=in->Nodes->globalDegreesOfFreedom[i]; |
235 |
if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) { |
236 |
in->Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex]; |
237 |
setNewDOFId[i]=FALSE; |
238 |
} |
239 |
} |
240 |
|
241 |
if (p<mpiSize-1) { /* the final send can be skipped */ |
242 |
#ifdef PASO_MPI |
243 |
MPI_Sendrecv_replace(newGlobalDOFID,len, MPI_INT, |
244 |
dest, in->MPIInfo->msg_tag_counter, |
245 |
source, in->MPIInfo->msg_tag_counter, |
246 |
in->MPIInfo->comm,&status); |
247 |
#endif |
248 |
in->MPIInfo->msg_tag_counter++; |
249 |
current_rank=Paso_MPIInfo_mod(mpiSize, current_rank-1); |
250 |
} |
251 |
} |
252 |
for (i=0;i<mpiSize+1;++i) distribution[i]=new_distribution[i]; |
253 |
|
254 |
|
255 |
} |
256 |
TMPMEMFREE(index_list); |
257 |
} |
258 |
TMPMEMFREE(newGlobalDOFID); |
259 |
TMPMEMFREE(setNewDOFId); |
260 |
TMPMEMFREE(new_distribution); |
261 |
TMPMEMFREE(partition_count); |
262 |
TMPMEMFREE(partition); |
263 |
TMPMEMFREE(xyz); |
264 |
return; |
265 |
} |