/[escript]/trunk/finley/src/Mesh_optimizeDOFDistribution.c
ViewVC logotype

Annotation of /trunk/finley/src/Mesh_optimizeDOFDistribution.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1722 - (hide annotations)
Fri Aug 22 04:20:30 2008 UTC (11 years, 5 months ago) by gross
File MIME type: text/plain
File size: 11019 byte(s)
The CSR matrix patters handed ober to ParMetis may not containn the main diagonal entry. 
This revisison is fixing the problem.


1 ksteube 1315
2     /* $Id$ */
3    
4     /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15    
16     /**************************************************************/
17    
18     /* Finley: Mesh: optimizes the distribution of DOFs across processors */
19     /* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */
20     /* accordingly but the mesh has not been redesitributed yet */
21    
22     /**************************************************************/
23    
24     #include "Mesh.h"
25     #include "IndexList.h"
26     #ifdef _OPENMP
27     #include <omp.h>
28     #endif
29 gross 1720 #ifdef USE_PARMETIS
30 ksteube 1459 #include "parmetis.h"
31     #endif
32 ksteube 1315
33     /**************************************************************/
34    
35     void Finley_Mesh_optimizeDOFDistribution(Finley_Mesh* in,dim_t *distribution) {
36    
37     dim_t dim, i,j,k, myNumVertices,p, mpiSize, len, globalNumVertices,*partition_count=NULL, *new_distribution=NULL, *loc_partition_count=NULL;
38     bool_t *setNewDOFId=NULL;
39     index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID=NULL;
40     size_t mpiSize_size;
41     index_t* partition=NULL;
42     Paso_Pattern *pattern=NULL;
43     Paso_MPI_rank myRank,dest,source,current_rank, rank;
44     Finley_IndexList* index_list=NULL;
45     float *xyz=NULL;
46 ksteube 1459 int c;
47 ksteube 1315
48     #ifdef PASO_MPI
49     MPI_Status status;
50     #endif
51    
52     if (in==NULL) return;
53     if (in->Nodes == NULL) return;
54    
55     myRank=in->MPIInfo->rank;
56     mpiSize=in->MPIInfo->size;
57     mpiSize_size=mpiSize*sizeof(dim_t);
58     dim=in->Nodes->numDim;
59     /* first step is to distribute the elements according to a global X of DOF */
60    
61     myFirstVertex=distribution[myRank];
62     myLastVertex=distribution[myRank+1];
63     myNumVertices=myLastVertex-myFirstVertex;
64     globalNumVertices=distribution[mpiSize];
65     len=0;
66     for (p=0;p<mpiSize;++p) len=MAX(len,distribution[p+1]-distribution[p]);
67     partition=TMPMEMALLOC(len,index_t); /* len is used for the sending around of partition later on */
68     xyz=TMPMEMALLOC(myNumVertices*dim,float);
69     partition_count=TMPMEMALLOC(mpiSize+1,dim_t);
70     new_distribution=TMPMEMALLOC(mpiSize+1,dim_t);
71     newGlobalDOFID=TMPMEMALLOC(len,index_t);
72     setNewDOFId=TMPMEMALLOC(in->Nodes->numNodes,bool_t);
73     if (!(Finley_checkPtr(partition) || Finley_checkPtr(xyz) || Finley_checkPtr(partition_count) || Finley_checkPtr(partition_count) || Finley_checkPtr(newGlobalDOFID) || Finley_checkPtr(setNewDOFId))) {
74 ksteube 1459 dim_t *recvbuf=TMPMEMALLOC(mpiSize*mpiSize,dim_t);
75 ksteube 1315
76 gross 1722 /* set the coordinates: */
77 ksteube 1315 /* it is assumed that at least one node on this processor provides a coordinate */
78     #pragma omp parallel for private(i,j,k)
79     for (i=0;i<in->Nodes->numNodes;++i) {
80     k=in->Nodes->globalDegreesOfFreedom[i]-myFirstVertex;
81     if ((k>=0) && (k<myNumVertices)) {
82     for (j=0;j<dim;++j) xyz[k*dim+j]=(float)(in->Nodes->Coordinates[INDEX2(j,i,dim)]);
83     }
84     }
85    
86     index_list=TMPMEMALLOC(myNumVertices,Finley_IndexList);
87     /* ksteube CSR of DOF IDs */
88     /* create the adjacency structure xadj and adjncy */
89     if (! Finley_checkPtr(index_list)) {
90     #pragma omp parallel private(i)
91     {
92     #pragma omp for schedule(static)
93     for(i=0;i<myNumVertices;++i) {
94     index_list[i].extension=NULL;
95     index_list[i].n=0;
96     }
97     /* ksteube build CSR format */
98     /* insert contributions from element matrices into colums index index_list: */
99 gross 1722 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
100     in->Elements,in->Nodes->globalDegreesOfFreedom,
101     in->Nodes->globalDegreesOfFreedom);
102     Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
103     in->FaceElements,in->Nodes->globalDegreesOfFreedom,
104     in->Nodes->globalDegreesOfFreedom);
105     Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
106     in->ContactElements,in->Nodes->globalDegreesOfFreedom,
107     in->Nodes->globalDegreesOfFreedom);
108     Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
109     in->Points,in->Nodes->globalDegreesOfFreedom,
110     in->Nodes->globalDegreesOfFreedom);
111 ksteube 1315 }
112    
113     /* create the local matrix pattern */
114 gross 1552 pattern=Finley_IndexList_createPattern(0,myNumVertices,index_list,0,globalNumVertices,0);
115 ksteube 1315
116     /* clean up index list */
117     if (index_list!=NULL) {
118     #pragma omp parallel for private(i)
119     for(i=0;i<myNumVertices;++i) Finley_IndexList_free(index_list[i].extension);
120     }
121 gross 1722 {
122     int s=0, s0;
123     for (i=0;i<myNumVertices;++i) {
124     s0=s;
125     for (j=pattern->ptr[i];j<pattern->ptr[i+1];++j) {
126     if (pattern->index[j] != myFirstVertex+i) {
127     pattern->index[s]=pattern->index[j];
128     s++;
129     }
130     }
131     pattern->ptr[i]=s0;
132     }
133     pattern->ptr[myNumVertices]=s;
134     }
135 ksteube 1315
136 gross 1722
137 ksteube 1315 if (Finley_noError()) {
138    
139 gross 1720 #ifdef USE_PARMETIS
140    
141 ksteube 1459 if (in->MPIInfo->size>1) {
142     int i;
143     int wgtflag = 0;
144     int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
145     int ncon = 1;
146     int edgecut;
147     int options[2];
148     float *tpwgts = TMPMEMALLOC(ncon*mpiSize,float);
149     float *ubvec = TMPMEMALLOC(ncon,float);
150     for (i=0; i<ncon*mpiSize; i++) tpwgts[i] = 1.0/(float)mpiSize;
151     for (i=0; i<ncon; i++) ubvec[i] = 1.05;
152 gross 1722 options[0] = 3;
153 ksteube 1459 options[1] = 15;
154     ParMETIS_V3_PartGeomKway(distribution,
155 ksteube 1315 pattern->ptr,
156     pattern->index,
157 ksteube 1459 NULL,
158     NULL,
159     &wgtflag,
160     &numflag,
161     &dim,
162 ksteube 1315 xyz,
163 ksteube 1459 &ncon,
164     &mpiSize,
165     tpwgts,
166     ubvec,
167     options,
168     &edgecut,
169     partition, /* new CPU ownership of elements */
170     &(in->MPIInfo->comm));
171     printf("ParMETIS number of edges cut by partitioning: %d\n", edgecut);
172     TMPMEMFREE(ubvec);
173     TMPMEMFREE(tpwgts);
174     } else {
175     for (i=0;i<myNumVertices;++i) partition[i]=0; /* CPU 0 owns it */
176     }
177     #else
178     for (i=0;i<myNumVertices;++i) partition[i]=myRank; /* CPU 0 owns it */
179     #endif
180    
181 ksteube 1315 }
182    
183     Paso_Pattern_free(pattern);
184    
185     /* create a new distributioin and labeling of the DOF */
186     memset(new_distribution,0,mpiSize_size);
187     #pragma omp parallel private(loc_partition_count)
188     {
189     loc_partition_count=THREAD_MEMALLOC(mpiSize,dim_t);
190     memset(loc_partition_count,0,mpiSize_size);
191     #pragma omp for private(i)
192     for (i=0;i<myNumVertices;++i) loc_partition_count[partition[i]]++ ;
193     #pragma omp critical
194     {
195     for (i=0;i<mpiSize;++i) new_distribution[i]+=loc_partition_count[i];
196     }
197     THREAD_MEMFREE(loc_partition_count);
198     }
199     #ifdef PASO_MPI
200 ksteube 1459 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
201     MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
202 ksteube 1315 #else
203 ksteube 1459 for (i=0;i<mpiSize;++i) recvbuf[i]=new_distribution[i];
204 ksteube 1315 #endif
205     new_distribution[0]=0;
206 ksteube 1459 for (rank=0; rank<mpiSize;rank++) {
207     c=0;
208     for (i=0;i<myRank;++i) c+=recvbuf[rank+mpiSize*i];
209     for (i=0;i<myNumVertices;++i) {
210     if (rank==partition[i]) {
211     newGlobalDOFID[i]=new_distribution[rank]+c;
212     c++;
213     }
214     }
215     for (i=myRank+1;i<mpiSize;++i) c+=recvbuf[rank+mpiSize*i];
216     new_distribution[rank+1]=new_distribution[rank]+c;
217 ksteube 1315 }
218 ksteube 1459 TMPMEMFREE(recvbuf);
219    
220 ksteube 1315 /* now the overlap needs to be created by sending the partition around*/
221    
222     dest=Paso_MPIInfo_mod(mpiSize, myRank + 1);
223     source=Paso_MPIInfo_mod(mpiSize, myRank - 1);
224     current_rank=myRank;
225     #pragma omp parallel for private(i)
226     for (i=0;i<in->Nodes->numNodes;++i) setNewDOFId[i]=TRUE;
227    
228     for (p=0; p< mpiSize; ++p) {
229    
230     firstVertex=distribution[current_rank];
231     lastVertex=distribution[current_rank+1];
232     #pragma omp parallel for private(i,j,k)
233     for (i=0;i<in->Nodes->numNodes;++i) {
234     k=in->Nodes->globalDegreesOfFreedom[i];
235     if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) {
236     in->Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex];
237     setNewDOFId[i]=FALSE;
238     }
239     }
240    
241     if (p<mpiSize-1) { /* the final send can be skipped */
242     #ifdef PASO_MPI
243     MPI_Sendrecv_replace(newGlobalDOFID,len, MPI_INT,
244     dest, in->MPIInfo->msg_tag_counter,
245     source, in->MPIInfo->msg_tag_counter,
246     in->MPIInfo->comm,&status);
247     #endif
248     in->MPIInfo->msg_tag_counter++;
249     current_rank=Paso_MPIInfo_mod(mpiSize, current_rank-1);
250     }
251     }
252     for (i=0;i<mpiSize+1;++i) distribution[i]=new_distribution[i];
253    
254    
255     }
256     TMPMEMFREE(index_list);
257     }
258     TMPMEMFREE(newGlobalDOFID);
259     TMPMEMFREE(setNewDOFId);
260     TMPMEMFREE(new_distribution);
261     TMPMEMFREE(partition_count);
262     TMPMEMFREE(partition);
263     TMPMEMFREE(xyz);
264     return;
265     }

  ViewVC Help
Powered by ViewVC 1.1.26