/[escript]/trunk/dudley/src/Mesh_optimizeDOFDistribution.c
ViewVC logotype

Contents of /trunk/dudley/src/Mesh_optimizeDOFDistribution.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1804 - (show annotations)
Wed Sep 24 07:52:19 2008 UTC (10 years, 10 months ago) by gross
Original Path: trunk/finley/src/Mesh_optimizeDOFDistribution.c
File MIME type: text/plain
File size: 11299 byte(s)
a robister version of the upwinding scheme
1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /**************************************************************/
17
18 /* Finley: Mesh: optimizes the distribution of DOFs across processors */
19 /* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */
20 /* accordingly but the mesh has not been redesitributed yet */
21
22 /**************************************************************/
23
24 #include "Mesh.h"
25 #include "IndexList.h"
26 #ifdef _OPENMP
27 #include <omp.h>
28 #endif
29 #ifdef USE_PARMETIS
30 #include "parmetis.h"
31 #endif
32
33 /**************************************************************/
34
35 void Finley_Mesh_optimizeDOFDistribution(Finley_Mesh* in,dim_t *distribution) {
36
37 dim_t dim, i,j,k, myNumVertices,p, mpiSize, len, globalNumVertices,*partition_count=NULL, *new_distribution=NULL, *loc_partition_count=NULL;
38 bool_t *setNewDOFId=NULL;
39 index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID=NULL;
40 size_t mpiSize_size;
41 index_t* partition=NULL;
42 Paso_Pattern *pattern=NULL;
43 Paso_MPI_rank myRank,dest,source,current_rank, rank;
44 Finley_IndexList* index_list=NULL;
45 float *xyz=NULL;
46 int c;
47
48 #ifdef PASO_MPI
49 MPI_Status status;
50 #endif
51
52 if (in==NULL) return;
53 if (in->Nodes == NULL) return;
54
55 myRank=in->MPIInfo->rank;
56 mpiSize=in->MPIInfo->size;
57 mpiSize_size=mpiSize*sizeof(dim_t);
58 dim=in->Nodes->numDim;
59 /* first step is to distribute the elements according to a global X of DOF */
60
61 myFirstVertex=distribution[myRank];
62 myLastVertex=distribution[myRank+1];
63 myNumVertices=myLastVertex-myFirstVertex;
64 globalNumVertices=distribution[mpiSize];
65 len=0;
66 for (p=0;p<mpiSize;++p) len=MAX(len,distribution[p+1]-distribution[p]);
67 partition=TMPMEMALLOC(len,index_t); /* len is used for the sending around of partition later on */
68 xyz=TMPMEMALLOC(myNumVertices*dim,float);
69 partition_count=TMPMEMALLOC(mpiSize+1,dim_t);
70 new_distribution=TMPMEMALLOC(mpiSize+1,dim_t);
71 newGlobalDOFID=TMPMEMALLOC(len,index_t);
72 setNewDOFId=TMPMEMALLOC(in->Nodes->numNodes,bool_t);
73 if (!(Finley_checkPtr(partition) || Finley_checkPtr(xyz) || Finley_checkPtr(partition_count) || Finley_checkPtr(partition_count) || Finley_checkPtr(newGlobalDOFID) || Finley_checkPtr(setNewDOFId))) {
74 dim_t *recvbuf=TMPMEMALLOC(mpiSize*mpiSize,dim_t);
75
76 /* set the coordinates: */
77 /* it is assumed that at least one node on this processor provides a coordinate */
78 #pragma omp parallel for private(i,j,k)
79 for (i=0;i<in->Nodes->numNodes;++i) {
80 k=in->Nodes->globalDegreesOfFreedom[i]-myFirstVertex;
81 if ((k>=0) && (k<myNumVertices)) {
82 for (j=0;j<dim;++j) xyz[k*dim+j]=(float)(in->Nodes->Coordinates[INDEX2(j,i,dim)]);
83 }
84 }
85
86 index_list=TMPMEMALLOC(myNumVertices,Finley_IndexList);
87 /* ksteube CSR of DOF IDs */
88 /* create the adjacency structure xadj and adjncy */
89 if (! Finley_checkPtr(index_list)) {
90 #pragma omp parallel private(i)
91 {
92 #pragma omp for schedule(static)
93 for(i=0;i<myNumVertices;++i) {
94 index_list[i].extension=NULL;
95 index_list[i].n=0;
96 }
97 /* ksteube build CSR format */
98 /* insert contributions from element matrices into colums index index_list: */
99 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
100 in->Elements,in->Nodes->globalDegreesOfFreedom,
101 in->Nodes->globalDegreesOfFreedom);
102 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
103 in->FaceElements,in->Nodes->globalDegreesOfFreedom,
104 in->Nodes->globalDegreesOfFreedom);
105 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
106 in->ContactElements,in->Nodes->globalDegreesOfFreedom,
107 in->Nodes->globalDegreesOfFreedom);
108 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
109 in->Points,in->Nodes->globalDegreesOfFreedom,
110 in->Nodes->globalDegreesOfFreedom);
111 }
112
113 /* create the local matrix pattern */
114 pattern=Finley_IndexList_createPattern(0,myNumVertices,index_list,0,globalNumVertices,0);
115
116 /* clean up index list */
117 if (index_list!=NULL) {
118 #pragma omp parallel for private(i)
119 for(i=0;i<myNumVertices;++i) Finley_IndexList_free(index_list[i].extension);
120 }
121
122 if (Finley_noError()) {
123
124 #ifdef USE_PARMETIS
125
126 if (in->MPIInfo->size>1) {
127 int i;
128 int wgtflag = 0;
129 int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
130 int ncon = 1;
131 int edgecut;
132 int options[2];
133 float *tpwgts = TMPMEMALLOC(ncon*mpiSize,float);
134 float *ubvec = TMPMEMALLOC(ncon,float);
135 for (i=0; i<ncon*mpiSize; i++) tpwgts[i] = 1.0/(float)mpiSize;
136 for (i=0; i<ncon; i++) ubvec[i] = 1.05;
137 options[0] = 3;
138 options[1] = 15;
139
140 /*
141 {
142 int k=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
143 int min_i,max_i;
144 printf("INPUT PARMETIS: %d\n",k);
145 for(i=0;i<in->MPIInfo->size+1;++i) printf("%d ",distribution[i]);
146 printf("\n");
147 min_i=pattern->index[0];
148 max_i=pattern->index[0];
149 for(i=0;i<pattern->ptr[k];++i) {
150 min_i=MIN(min_i,pattern->index[i]);
151 max_i=MAX(max_i,pattern->index[i]);
152 }
153 printf("index range = %d : %d\n",min_i,max_i);
154
155 for(i=0;i<k+1;++i) printf("%d ",pattern->ptr[i]);
156 printf("\n");
157 for(i=0;i<pattern->ptr[k];++i) printf("%d ",pattern->index[i]);
158 printf("\n");
159 }
160 */
161 ParMETIS_V3_PartGeomKway(distribution,
162 pattern->ptr,
163 pattern->index,
164 NULL,
165 NULL,
166 &wgtflag,
167 &numflag,
168 &dim,
169 xyz,
170 &ncon,
171 &mpiSize,
172 tpwgts,
173 ubvec,
174 options,
175 &edgecut,
176 partition, /* new CPU ownership of elements */
177 &(in->MPIInfo->comm));
178 printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1));
179 TMPMEMFREE(ubvec);
180 TMPMEMFREE(tpwgts);
181 } else {
182 for (i=0;i<myNumVertices;++i) partition[i]=0; /* CPU 0 owns it */
183 }
184 #else
185 for (i=0;i<myNumVertices;++i) partition[i]=myRank; /* CPU 0 owns it */
186 #endif
187
188 }
189
190 Paso_Pattern_free(pattern);
191
192 /* create a new distributioin and labeling of the DOF */
193 memset(new_distribution,0,mpiSize_size);
194 #pragma omp parallel private(loc_partition_count)
195 {
196 loc_partition_count=THREAD_MEMALLOC(mpiSize,dim_t);
197 memset(loc_partition_count,0,mpiSize_size);
198 #pragma omp for private(i)
199 for (i=0;i<myNumVertices;++i) loc_partition_count[partition[i]]++ ;
200 #pragma omp critical
201 {
202 for (i=0;i<mpiSize;++i) new_distribution[i]+=loc_partition_count[i];
203 }
204 THREAD_MEMFREE(loc_partition_count);
205 }
206 #ifdef PASO_MPI
207 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
208 MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
209 #else
210 for (i=0;i<mpiSize;++i) recvbuf[i]=new_distribution[i];
211 #endif
212 new_distribution[0]=0;
213 for (rank=0; rank<mpiSize;rank++) {
214 c=0;
215 for (i=0;i<myRank;++i) c+=recvbuf[rank+mpiSize*i];
216 for (i=0;i<myNumVertices;++i) {
217 if (rank==partition[i]) {
218 newGlobalDOFID[i]=new_distribution[rank]+c;
219 c++;
220 }
221 }
222 for (i=myRank+1;i<mpiSize;++i) c+=recvbuf[rank+mpiSize*i];
223 new_distribution[rank+1]=new_distribution[rank]+c;
224 }
225 TMPMEMFREE(recvbuf);
226
227 /* now the overlap needs to be created by sending the partition around*/
228
229 dest=Paso_MPIInfo_mod(mpiSize, myRank + 1);
230 source=Paso_MPIInfo_mod(mpiSize, myRank - 1);
231 current_rank=myRank;
232 #pragma omp parallel for private(i)
233 for (i=0;i<in->Nodes->numNodes;++i) setNewDOFId[i]=TRUE;
234
235 for (p=0; p< mpiSize; ++p) {
236
237 firstVertex=distribution[current_rank];
238 lastVertex=distribution[current_rank+1];
239 #pragma omp parallel for private(i,j,k)
240 for (i=0;i<in->Nodes->numNodes;++i) {
241 k=in->Nodes->globalDegreesOfFreedom[i];
242 if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) {
243 in->Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex];
244 setNewDOFId[i]=FALSE;
245 }
246 }
247
248 if (p<mpiSize-1) { /* the final send can be skipped */
249 #ifdef PASO_MPI
250 MPI_Sendrecv_replace(newGlobalDOFID,len, MPI_INT,
251 dest, in->MPIInfo->msg_tag_counter,
252 source, in->MPIInfo->msg_tag_counter,
253 in->MPIInfo->comm,&status);
254 #endif
255 in->MPIInfo->msg_tag_counter++;
256 current_rank=Paso_MPIInfo_mod(mpiSize, current_rank-1);
257 }
258 }
259 for (i=0;i<mpiSize+1;++i) distribution[i]=new_distribution[i];
260 }
261 TMPMEMFREE(index_list);
262 }
263 TMPMEMFREE(newGlobalDOFID);
264 TMPMEMFREE(setNewDOFId);
265 TMPMEMFREE(new_distribution);
266 TMPMEMFREE(partition_count);
267 TMPMEMFREE(partition);
268 TMPMEMFREE(xyz);
269 return;
270 }

  ViewVC Help
Powered by ViewVC 1.1.26