/[escript]/trunk/finley/src/Mesh_optimizeDOFDistribution.c
ViewVC logotype

Contents of /trunk/finley/src/Mesh_optimizeDOFDistribution.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1811 - (show annotations)
Thu Sep 25 23:11:13 2008 UTC (11 years, 2 months ago) by ksteube
File MIME type: text/plain
File size: 11264 byte(s)
Copyright updated in all files

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Finley: Mesh: optimizes the distribution of DOFs across processors */
18 /* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */
19 /* accordingly but the mesh has not been redesitributed yet */
20
21 /**************************************************************/
22
23 #include "Mesh.h"
24 #include "IndexList.h"
25 #ifdef _OPENMP
26 #include <omp.h>
27 #endif
28 #ifdef USE_PARMETIS
29 #include "parmetis.h"
30 #endif
31
32 /**************************************************************/
33
34 void Finley_Mesh_optimizeDOFDistribution(Finley_Mesh* in,dim_t *distribution) {
35
36 dim_t dim, i,j,k, myNumVertices,p, mpiSize, len, globalNumVertices,*partition_count=NULL, *new_distribution=NULL, *loc_partition_count=NULL;
37 bool_t *setNewDOFId=NULL;
38 index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID=NULL;
39 size_t mpiSize_size;
40 index_t* partition=NULL;
41 Paso_Pattern *pattern=NULL;
42 Paso_MPI_rank myRank,dest,source,current_rank, rank;
43 Finley_IndexList* index_list=NULL;
44 float *xyz=NULL;
45 int c;
46
47 #ifdef PASO_MPI
48 MPI_Status status;
49 #endif
50
51 if (in==NULL) return;
52 if (in->Nodes == NULL) return;
53
54 myRank=in->MPIInfo->rank;
55 mpiSize=in->MPIInfo->size;
56 mpiSize_size=mpiSize*sizeof(dim_t);
57 dim=in->Nodes->numDim;
58 /* first step is to distribute the elements according to a global X of DOF */
59
60 myFirstVertex=distribution[myRank];
61 myLastVertex=distribution[myRank+1];
62 myNumVertices=myLastVertex-myFirstVertex;
63 globalNumVertices=distribution[mpiSize];
64 len=0;
65 for (p=0;p<mpiSize;++p) len=MAX(len,distribution[p+1]-distribution[p]);
66 partition=TMPMEMALLOC(len,index_t); /* len is used for the sending around of partition later on */
67 xyz=TMPMEMALLOC(myNumVertices*dim,float);
68 partition_count=TMPMEMALLOC(mpiSize+1,dim_t);
69 new_distribution=TMPMEMALLOC(mpiSize+1,dim_t);
70 newGlobalDOFID=TMPMEMALLOC(len,index_t);
71 setNewDOFId=TMPMEMALLOC(in->Nodes->numNodes,bool_t);
72 if (!(Finley_checkPtr(partition) || Finley_checkPtr(xyz) || Finley_checkPtr(partition_count) || Finley_checkPtr(partition_count) || Finley_checkPtr(newGlobalDOFID) || Finley_checkPtr(setNewDOFId))) {
73 dim_t *recvbuf=TMPMEMALLOC(mpiSize*mpiSize,dim_t);
74
75 /* set the coordinates: */
76 /* it is assumed that at least one node on this processor provides a coordinate */
77 #pragma omp parallel for private(i,j,k)
78 for (i=0;i<in->Nodes->numNodes;++i) {
79 k=in->Nodes->globalDegreesOfFreedom[i]-myFirstVertex;
80 if ((k>=0) && (k<myNumVertices)) {
81 for (j=0;j<dim;++j) xyz[k*dim+j]=(float)(in->Nodes->Coordinates[INDEX2(j,i,dim)]);
82 }
83 }
84
85 index_list=TMPMEMALLOC(myNumVertices,Finley_IndexList);
86 /* ksteube CSR of DOF IDs */
87 /* create the adjacency structure xadj and adjncy */
88 if (! Finley_checkPtr(index_list)) {
89 #pragma omp parallel private(i)
90 {
91 #pragma omp for schedule(static)
92 for(i=0;i<myNumVertices;++i) {
93 index_list[i].extension=NULL;
94 index_list[i].n=0;
95 }
96 /* ksteube build CSR format */
97 /* insert contributions from element matrices into colums index index_list: */
98 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
99 in->Elements,in->Nodes->globalDegreesOfFreedom,
100 in->Nodes->globalDegreesOfFreedom);
101 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
102 in->FaceElements,in->Nodes->globalDegreesOfFreedom,
103 in->Nodes->globalDegreesOfFreedom);
104 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
105 in->ContactElements,in->Nodes->globalDegreesOfFreedom,
106 in->Nodes->globalDegreesOfFreedom);
107 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
108 in->Points,in->Nodes->globalDegreesOfFreedom,
109 in->Nodes->globalDegreesOfFreedom);
110 }
111
112 /* create the local matrix pattern */
113 pattern=Finley_IndexList_createPattern(0,myNumVertices,index_list,0,globalNumVertices,0);
114
115 /* clean up index list */
116 if (index_list!=NULL) {
117 #pragma omp parallel for private(i)
118 for(i=0;i<myNumVertices;++i) Finley_IndexList_free(index_list[i].extension);
119 }
120
121 if (Finley_noError()) {
122
123 #ifdef USE_PARMETIS
124
125 if (in->MPIInfo->size>1) {
126 int i;
127 int wgtflag = 0;
128 int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
129 int ncon = 1;
130 int edgecut;
131 int options[2];
132 float *tpwgts = TMPMEMALLOC(ncon*mpiSize,float);
133 float *ubvec = TMPMEMALLOC(ncon,float);
134 for (i=0; i<ncon*mpiSize; i++) tpwgts[i] = 1.0/(float)mpiSize;
135 for (i=0; i<ncon; i++) ubvec[i] = 1.05;
136 options[0] = 3;
137 options[1] = 15;
138
139 /*
140 {
141 int k=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
142 int min_i,max_i;
143 printf("INPUT PARMETIS: %d\n",k);
144 for(i=0;i<in->MPIInfo->size+1;++i) printf("%d ",distribution[i]);
145 printf("\n");
146 min_i=pattern->index[0];
147 max_i=pattern->index[0];
148 for(i=0;i<pattern->ptr[k];++i) {
149 min_i=MIN(min_i,pattern->index[i]);
150 max_i=MAX(max_i,pattern->index[i]);
151 }
152 printf("index range = %d : %d\n",min_i,max_i);
153
154 for(i=0;i<k+1;++i) printf("%d ",pattern->ptr[i]);
155 printf("\n");
156 for(i=0;i<pattern->ptr[k];++i) printf("%d ",pattern->index[i]);
157 printf("\n");
158 }
159 */
160 ParMETIS_V3_PartGeomKway(distribution,
161 pattern->ptr,
162 pattern->index,
163 NULL,
164 NULL,
165 &wgtflag,
166 &numflag,
167 &dim,
168 xyz,
169 &ncon,
170 &mpiSize,
171 tpwgts,
172 ubvec,
173 options,
174 &edgecut,
175 partition, /* new CPU ownership of elements */
176 &(in->MPIInfo->comm));
177 printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1));
178 TMPMEMFREE(ubvec);
179 TMPMEMFREE(tpwgts);
180 } else {
181 for (i=0;i<myNumVertices;++i) partition[i]=0; /* CPU 0 owns it */
182 }
183 #else
184 for (i=0;i<myNumVertices;++i) partition[i]=myRank; /* CPU 0 owns it */
185 #endif
186
187 }
188
189 Paso_Pattern_free(pattern);
190
191 /* create a new distributioin and labeling of the DOF */
192 memset(new_distribution,0,mpiSize_size);
193 #pragma omp parallel private(loc_partition_count)
194 {
195 loc_partition_count=THREAD_MEMALLOC(mpiSize,dim_t);
196 memset(loc_partition_count,0,mpiSize_size);
197 #pragma omp for private(i)
198 for (i=0;i<myNumVertices;++i) loc_partition_count[partition[i]]++ ;
199 #pragma omp critical
200 {
201 for (i=0;i<mpiSize;++i) new_distribution[i]+=loc_partition_count[i];
202 }
203 THREAD_MEMFREE(loc_partition_count);
204 }
205 #ifdef PASO_MPI
206 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
207 MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
208 #else
209 for (i=0;i<mpiSize;++i) recvbuf[i]=new_distribution[i];
210 #endif
211 new_distribution[0]=0;
212 for (rank=0; rank<mpiSize;rank++) {
213 c=0;
214 for (i=0;i<myRank;++i) c+=recvbuf[rank+mpiSize*i];
215 for (i=0;i<myNumVertices;++i) {
216 if (rank==partition[i]) {
217 newGlobalDOFID[i]=new_distribution[rank]+c;
218 c++;
219 }
220 }
221 for (i=myRank+1;i<mpiSize;++i) c+=recvbuf[rank+mpiSize*i];
222 new_distribution[rank+1]=new_distribution[rank]+c;
223 }
224 TMPMEMFREE(recvbuf);
225
226 /* now the overlap needs to be created by sending the partition around*/
227
228 dest=Paso_MPIInfo_mod(mpiSize, myRank + 1);
229 source=Paso_MPIInfo_mod(mpiSize, myRank - 1);
230 current_rank=myRank;
231 #pragma omp parallel for private(i)
232 for (i=0;i<in->Nodes->numNodes;++i) setNewDOFId[i]=TRUE;
233
234 for (p=0; p< mpiSize; ++p) {
235
236 firstVertex=distribution[current_rank];
237 lastVertex=distribution[current_rank+1];
238 #pragma omp parallel for private(i,j,k)
239 for (i=0;i<in->Nodes->numNodes;++i) {
240 k=in->Nodes->globalDegreesOfFreedom[i];
241 if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) {
242 in->Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex];
243 setNewDOFId[i]=FALSE;
244 }
245 }
246
247 if (p<mpiSize-1) { /* the final send can be skipped */
248 #ifdef PASO_MPI
249 MPI_Sendrecv_replace(newGlobalDOFID,len, MPI_INT,
250 dest, in->MPIInfo->msg_tag_counter,
251 source, in->MPIInfo->msg_tag_counter,
252 in->MPIInfo->comm,&status);
253 #endif
254 in->MPIInfo->msg_tag_counter++;
255 current_rank=Paso_MPIInfo_mod(mpiSize, current_rank-1);
256 }
257 }
258 for (i=0;i<mpiSize+1;++i) distribution[i]=new_distribution[i];
259 }
260 TMPMEMFREE(index_list);
261 }
262 TMPMEMFREE(newGlobalDOFID);
263 TMPMEMFREE(setNewDOFId);
264 TMPMEMFREE(new_distribution);
265 TMPMEMFREE(partition_count);
266 TMPMEMFREE(partition);
267 TMPMEMFREE(xyz);
268 return;
269 }

  ViewVC Help
Powered by ViewVC 1.1.26