/[escript]/trunk/finley/src/Mesh_optimizeDOFDistribution.c
ViewVC logotype

Annotation of /trunk/finley/src/Mesh_optimizeDOFDistribution.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1811 - (hide annotations)
Thu Sep 25 23:11:13 2008 UTC (10 years, 11 months ago) by ksteube
File MIME type: text/plain
File size: 11264 byte(s)
Copyright updated in all files

1 ksteube 1315
2     /*******************************************************
3 ksteube 1811 *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 ksteube 1315
14 ksteube 1811
15 ksteube 1315 /**************************************************************/
16    
17     /* Finley: Mesh: optimizes the distribution of DOFs across processors */
18     /* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */
19     /* accordingly but the mesh has not been redesitributed yet */
20    
21     /**************************************************************/
22    
23     #include "Mesh.h"
24     #include "IndexList.h"
25     #ifdef _OPENMP
26     #include <omp.h>
27     #endif
28 gross 1720 #ifdef USE_PARMETIS
29 ksteube 1459 #include "parmetis.h"
30     #endif
31 ksteube 1315
32     /**************************************************************/
33    
34     void Finley_Mesh_optimizeDOFDistribution(Finley_Mesh* in,dim_t *distribution) {
35    
36     dim_t dim, i,j,k, myNumVertices,p, mpiSize, len, globalNumVertices,*partition_count=NULL, *new_distribution=NULL, *loc_partition_count=NULL;
37     bool_t *setNewDOFId=NULL;
38     index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID=NULL;
39     size_t mpiSize_size;
40     index_t* partition=NULL;
41     Paso_Pattern *pattern=NULL;
42     Paso_MPI_rank myRank,dest,source,current_rank, rank;
43     Finley_IndexList* index_list=NULL;
44     float *xyz=NULL;
45 ksteube 1459 int c;
46 ksteube 1315
47     #ifdef PASO_MPI
48     MPI_Status status;
49     #endif
50    
51     if (in==NULL) return;
52     if (in->Nodes == NULL) return;
53    
54     myRank=in->MPIInfo->rank;
55     mpiSize=in->MPIInfo->size;
56     mpiSize_size=mpiSize*sizeof(dim_t);
57     dim=in->Nodes->numDim;
58     /* first step is to distribute the elements according to a global X of DOF */
59    
60     myFirstVertex=distribution[myRank];
61     myLastVertex=distribution[myRank+1];
62     myNumVertices=myLastVertex-myFirstVertex;
63     globalNumVertices=distribution[mpiSize];
64     len=0;
65     for (p=0;p<mpiSize;++p) len=MAX(len,distribution[p+1]-distribution[p]);
66     partition=TMPMEMALLOC(len,index_t); /* len is used for the sending around of partition later on */
67     xyz=TMPMEMALLOC(myNumVertices*dim,float);
68     partition_count=TMPMEMALLOC(mpiSize+1,dim_t);
69     new_distribution=TMPMEMALLOC(mpiSize+1,dim_t);
70     newGlobalDOFID=TMPMEMALLOC(len,index_t);
71     setNewDOFId=TMPMEMALLOC(in->Nodes->numNodes,bool_t);
72     if (!(Finley_checkPtr(partition) || Finley_checkPtr(xyz) || Finley_checkPtr(partition_count) || Finley_checkPtr(partition_count) || Finley_checkPtr(newGlobalDOFID) || Finley_checkPtr(setNewDOFId))) {
73 ksteube 1459 dim_t *recvbuf=TMPMEMALLOC(mpiSize*mpiSize,dim_t);
74 ksteube 1315
75 gross 1722 /* set the coordinates: */
76 ksteube 1315 /* it is assumed that at least one node on this processor provides a coordinate */
77     #pragma omp parallel for private(i,j,k)
78     for (i=0;i<in->Nodes->numNodes;++i) {
79     k=in->Nodes->globalDegreesOfFreedom[i]-myFirstVertex;
80     if ((k>=0) && (k<myNumVertices)) {
81     for (j=0;j<dim;++j) xyz[k*dim+j]=(float)(in->Nodes->Coordinates[INDEX2(j,i,dim)]);
82     }
83     }
84    
85     index_list=TMPMEMALLOC(myNumVertices,Finley_IndexList);
86     /* ksteube CSR of DOF IDs */
87     /* create the adjacency structure xadj and adjncy */
88     if (! Finley_checkPtr(index_list)) {
89     #pragma omp parallel private(i)
90     {
91     #pragma omp for schedule(static)
92     for(i=0;i<myNumVertices;++i) {
93     index_list[i].extension=NULL;
94     index_list[i].n=0;
95     }
96     /* ksteube build CSR format */
97     /* insert contributions from element matrices into colums index index_list: */
98 gross 1722 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
99     in->Elements,in->Nodes->globalDegreesOfFreedom,
100     in->Nodes->globalDegreesOfFreedom);
101     Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
102     in->FaceElements,in->Nodes->globalDegreesOfFreedom,
103     in->Nodes->globalDegreesOfFreedom);
104     Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
105     in->ContactElements,in->Nodes->globalDegreesOfFreedom,
106     in->Nodes->globalDegreesOfFreedom);
107     Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
108     in->Points,in->Nodes->globalDegreesOfFreedom,
109     in->Nodes->globalDegreesOfFreedom);
110 ksteube 1315 }
111    
112     /* create the local matrix pattern */
113 gross 1552 pattern=Finley_IndexList_createPattern(0,myNumVertices,index_list,0,globalNumVertices,0);
114 ksteube 1315
115     /* clean up index list */
116     if (index_list!=NULL) {
117     #pragma omp parallel for private(i)
118     for(i=0;i<myNumVertices;++i) Finley_IndexList_free(index_list[i].extension);
119     }
120    
121     if (Finley_noError()) {
122    
123 gross 1720 #ifdef USE_PARMETIS
124    
125 ksteube 1459 if (in->MPIInfo->size>1) {
126     int i;
127     int wgtflag = 0;
128     int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
129     int ncon = 1;
130     int edgecut;
131     int options[2];
132     float *tpwgts = TMPMEMALLOC(ncon*mpiSize,float);
133     float *ubvec = TMPMEMALLOC(ncon,float);
134     for (i=0; i<ncon*mpiSize; i++) tpwgts[i] = 1.0/(float)mpiSize;
135     for (i=0; i<ncon; i++) ubvec[i] = 1.05;
136 gross 1722 options[0] = 3;
137 ksteube 1459 options[1] = 15;
138 gross 1763
139 gross 1804 /*
140     {
141     int k=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
142     int min_i,max_i;
143     printf("INPUT PARMETIS: %d\n",k);
144     for(i=0;i<in->MPIInfo->size+1;++i) printf("%d ",distribution[i]);
145     printf("\n");
146     min_i=pattern->index[0];
147     max_i=pattern->index[0];
148     for(i=0;i<pattern->ptr[k];++i) {
149     min_i=MIN(min_i,pattern->index[i]);
150     max_i=MAX(max_i,pattern->index[i]);
151     }
152     printf("index range = %d : %d\n",min_i,max_i);
153    
154     for(i=0;i<k+1;++i) printf("%d ",pattern->ptr[i]);
155     printf("\n");
156     for(i=0;i<pattern->ptr[k];++i) printf("%d ",pattern->index[i]);
157     printf("\n");
158     }
159     */
160 ksteube 1459 ParMETIS_V3_PartGeomKway(distribution,
161 ksteube 1315 pattern->ptr,
162     pattern->index,
163 ksteube 1459 NULL,
164     NULL,
165     &wgtflag,
166     &numflag,
167     &dim,
168 ksteube 1315 xyz,
169 ksteube 1459 &ncon,
170     &mpiSize,
171     tpwgts,
172     ubvec,
173     options,
174     &edgecut,
175     partition, /* new CPU ownership of elements */
176     &(in->MPIInfo->comm));
177 gross 1763 printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1));
178 ksteube 1459 TMPMEMFREE(ubvec);
179     TMPMEMFREE(tpwgts);
180     } else {
181     for (i=0;i<myNumVertices;++i) partition[i]=0; /* CPU 0 owns it */
182     }
183     #else
184     for (i=0;i<myNumVertices;++i) partition[i]=myRank; /* CPU 0 owns it */
185     #endif
186    
187 ksteube 1315 }
188    
189     Paso_Pattern_free(pattern);
190    
191     /* create a new distributioin and labeling of the DOF */
192     memset(new_distribution,0,mpiSize_size);
193     #pragma omp parallel private(loc_partition_count)
194     {
195     loc_partition_count=THREAD_MEMALLOC(mpiSize,dim_t);
196     memset(loc_partition_count,0,mpiSize_size);
197     #pragma omp for private(i)
198     for (i=0;i<myNumVertices;++i) loc_partition_count[partition[i]]++ ;
199     #pragma omp critical
200     {
201     for (i=0;i<mpiSize;++i) new_distribution[i]+=loc_partition_count[i];
202     }
203     THREAD_MEMFREE(loc_partition_count);
204     }
205     #ifdef PASO_MPI
206 ksteube 1459 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
207     MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
208 ksteube 1315 #else
209 ksteube 1459 for (i=0;i<mpiSize;++i) recvbuf[i]=new_distribution[i];
210 ksteube 1315 #endif
211     new_distribution[0]=0;
212 ksteube 1459 for (rank=0; rank<mpiSize;rank++) {
213     c=0;
214     for (i=0;i<myRank;++i) c+=recvbuf[rank+mpiSize*i];
215     for (i=0;i<myNumVertices;++i) {
216     if (rank==partition[i]) {
217     newGlobalDOFID[i]=new_distribution[rank]+c;
218     c++;
219     }
220     }
221     for (i=myRank+1;i<mpiSize;++i) c+=recvbuf[rank+mpiSize*i];
222     new_distribution[rank+1]=new_distribution[rank]+c;
223 ksteube 1315 }
224 ksteube 1459 TMPMEMFREE(recvbuf);
225    
226 ksteube 1315 /* now the overlap needs to be created by sending the partition around*/
227    
228     dest=Paso_MPIInfo_mod(mpiSize, myRank + 1);
229     source=Paso_MPIInfo_mod(mpiSize, myRank - 1);
230     current_rank=myRank;
231     #pragma omp parallel for private(i)
232     for (i=0;i<in->Nodes->numNodes;++i) setNewDOFId[i]=TRUE;
233    
234     for (p=0; p< mpiSize; ++p) {
235    
236     firstVertex=distribution[current_rank];
237     lastVertex=distribution[current_rank+1];
238     #pragma omp parallel for private(i,j,k)
239     for (i=0;i<in->Nodes->numNodes;++i) {
240     k=in->Nodes->globalDegreesOfFreedom[i];
241     if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) {
242     in->Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex];
243     setNewDOFId[i]=FALSE;
244     }
245     }
246    
247     if (p<mpiSize-1) { /* the final send can be skipped */
248     #ifdef PASO_MPI
249     MPI_Sendrecv_replace(newGlobalDOFID,len, MPI_INT,
250     dest, in->MPIInfo->msg_tag_counter,
251     source, in->MPIInfo->msg_tag_counter,
252     in->MPIInfo->comm,&status);
253     #endif
254     in->MPIInfo->msg_tag_counter++;
255     current_rank=Paso_MPIInfo_mod(mpiSize, current_rank-1);
256     }
257     }
258     for (i=0;i<mpiSize+1;++i) distribution[i]=new_distribution[i];
259     }
260     TMPMEMFREE(index_list);
261     }
262     TMPMEMFREE(newGlobalDOFID);
263     TMPMEMFREE(setNewDOFId);
264     TMPMEMFREE(new_distribution);
265     TMPMEMFREE(partition_count);
266     TMPMEMFREE(partition);
267     TMPMEMFREE(xyz);
268     return;
269     }

  ViewVC Help
Powered by ViewVC 1.1.26