/[escript]/trunk/dudley/src/Mesh_optimizeDOFDistribution.c
ViewVC logotype

Annotation of /trunk/dudley/src/Mesh_optimizeDOFDistribution.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3126 - (hide annotations)
Wed Sep 1 00:37:53 2010 UTC (8 years, 11 months ago) by jfenwick
Original Path: branches/domexper/dudley/src/Mesh_optimizeDOFDistribution.c
File MIME type: text/plain
File size: 11511 byte(s)
Updated referenceElements and enums
1 ksteube 1315
2     /*******************************************************
3 ksteube 1811 *
4 jfenwick 2881 * Copyright (c) 2003-2010 by University of Queensland
5 ksteube 1811 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 ksteube 1315
14 ksteube 1811
15 ksteube 1315 /**************************************************************/
16    
17 jfenwick 3086 /* Dudley: Mesh: optimizes the distribution of DOFs across processors */
18 ksteube 1315 /* using ParMETIS. On return a new distribution is given and the globalDOF are relabled */
19     /* accordingly but the mesh has not been redesitributed yet */
20    
21     /**************************************************************/
22    
23     #include "Mesh.h"
24     #include "IndexList.h"
25     #ifdef _OPENMP
26     #include <omp.h>
27     #endif
28 gross 1720 #ifdef USE_PARMETIS
29 ksteube 1459 #include "parmetis.h"
30     #endif
31 ksteube 1315
32 lgao 2897 /**************************************************************
33     Check whether there is any node which has no vertex. In case
34     such node exists, we don't use parmetis since parmetis requires
35     that every node has at least 1 vertex (at line 129 of file
36     "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
37     any node has no vertex).
38     **************************************************************/
39     #ifdef USE_PARMETIS
40     int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t *distribution, MPI_Comm *comm)
41     {
42     dim_t i, len;
43     int ret_val = 1;
44    
45     if (rank == 0){
46     for (i=0; i<mpiSize; i++){
47     len = distribution[i+1] - distribution[i];
48     if (len == 0){
49     ret_val = 0;
50     break;
51     }
52     }
53     }
54     MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm);
55     if (ret_val == 0)
56     printf("INFO: Parmetis is not used since some nodes have no vertex!\n");
57     return ret_val;
58     }
59     #endif
60    
61    
62    
63 ksteube 1315 /**************************************************************/
64    
65 jfenwick 3086 void Dudley_Mesh_optimizeDOFDistribution(Dudley_Mesh* in,dim_t *distribution) {
66 ksteube 1315
67     dim_t dim, i,j,k, myNumVertices,p, mpiSize, len, globalNumVertices,*partition_count=NULL, *new_distribution=NULL, *loc_partition_count=NULL;
68     bool_t *setNewDOFId=NULL;
69     index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID=NULL;
70     size_t mpiSize_size;
71     index_t* partition=NULL;
72     Paso_Pattern *pattern=NULL;
73     Paso_MPI_rank myRank,dest,source,current_rank, rank;
74 jfenwick 3086 Dudley_IndexList* index_list=NULL;
75 ksteube 1315 float *xyz=NULL;
76 ksteube 1459 int c;
77 ksteube 1315
78     #ifdef PASO_MPI
79     MPI_Status status;
80     #endif
81    
82     if (in==NULL) return;
83     if (in->Nodes == NULL) return;
84    
85     myRank=in->MPIInfo->rank;
86     mpiSize=in->MPIInfo->size;
87     mpiSize_size=mpiSize*sizeof(dim_t);
88     dim=in->Nodes->numDim;
89     /* first step is to distribute the elements according to a global X of DOF */
90    
91     myFirstVertex=distribution[myRank];
92     myLastVertex=distribution[myRank+1];
93     myNumVertices=myLastVertex-myFirstVertex;
94     globalNumVertices=distribution[mpiSize];
95     len=0;
96     for (p=0;p<mpiSize;++p) len=MAX(len,distribution[p+1]-distribution[p]);
97     partition=TMPMEMALLOC(len,index_t); /* len is used for the sending around of partition later on */
98     xyz=TMPMEMALLOC(myNumVertices*dim,float);
99     partition_count=TMPMEMALLOC(mpiSize+1,dim_t);
100     new_distribution=TMPMEMALLOC(mpiSize+1,dim_t);
101     newGlobalDOFID=TMPMEMALLOC(len,index_t);
102     setNewDOFId=TMPMEMALLOC(in->Nodes->numNodes,bool_t);
103 jfenwick 3086 if (!(Dudley_checkPtr(partition) || Dudley_checkPtr(xyz) || Dudley_checkPtr(partition_count) || Dudley_checkPtr(partition_count) || Dudley_checkPtr(newGlobalDOFID) || Dudley_checkPtr(setNewDOFId))) {
104 ksteube 1459 dim_t *recvbuf=TMPMEMALLOC(mpiSize*mpiSize,dim_t);
105 ksteube 1315
106 gross 1722 /* set the coordinates: */
107 ksteube 1315 /* it is assumed that at least one node on this processor provides a coordinate */
108     #pragma omp parallel for private(i,j,k)
109     for (i=0;i<in->Nodes->numNodes;++i) {
110     k=in->Nodes->globalDegreesOfFreedom[i]-myFirstVertex;
111     if ((k>=0) && (k<myNumVertices)) {
112     for (j=0;j<dim;++j) xyz[k*dim+j]=(float)(in->Nodes->Coordinates[INDEX2(j,i,dim)]);
113     }
114     }
115    
116 jfenwick 3086 index_list=TMPMEMALLOC(myNumVertices,Dudley_IndexList);
117 ksteube 1315 /* ksteube CSR of DOF IDs */
118     /* create the adjacency structure xadj and adjncy */
119 jfenwick 3086 if (! Dudley_checkPtr(index_list)) {
120 ksteube 1315 #pragma omp parallel private(i)
121     {
122     #pragma omp for schedule(static)
123     for(i=0;i<myNumVertices;++i) {
124     index_list[i].extension=NULL;
125     index_list[i].n=0;
126     }
127     /* ksteube build CSR format */
128     /* insert contributions from element matrices into colums index index_list: */
129 jfenwick 3086 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
130 gross 1722 in->Elements,in->Nodes->globalDegreesOfFreedom,
131     in->Nodes->globalDegreesOfFreedom);
132 jfenwick 3086 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
133 gross 1722 in->FaceElements,in->Nodes->globalDegreesOfFreedom,
134     in->Nodes->globalDegreesOfFreedom);
135 jfenwick 3086 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
136 gross 1722 in->Points,in->Nodes->globalDegreesOfFreedom,
137     in->Nodes->globalDegreesOfFreedom);
138 ksteube 1315 }
139    
140     /* create the local matrix pattern */
141 jfenwick 3086 pattern=Dudley_IndexList_createPattern(0,myNumVertices,index_list,0,globalNumVertices,0);
142 ksteube 1315
143     /* clean up index list */
144     if (index_list!=NULL) {
145     #pragma omp parallel for private(i)
146 jfenwick 3086 for(i=0;i<myNumVertices;++i) Dudley_IndexList_free(index_list[i].extension);
147 ksteube 1315 }
148    
149 jfenwick 3086 if (Dudley_noError()) {
150 ksteube 1315
151 gross 1720 #ifdef USE_PARMETIS
152    
153 lgao 2897 if (mpiSize>1 &&
154     Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm))>0 ) {
155 ksteube 1459 int i;
156     int wgtflag = 0;
157     int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
158     int ncon = 1;
159     int edgecut;
160     int options[2];
161     float *tpwgts = TMPMEMALLOC(ncon*mpiSize,float);
162     float *ubvec = TMPMEMALLOC(ncon,float);
163     for (i=0; i<ncon*mpiSize; i++) tpwgts[i] = 1.0/(float)mpiSize;
164     for (i=0; i<ncon; i++) ubvec[i] = 1.05;
165 gross 1722 options[0] = 3;
166 ksteube 1459 options[1] = 15;
167 gross 2358 ParMETIS_V3_PartGeomKway(distribution,
168     pattern->ptr,
169     pattern->index,
170     NULL,
171     NULL,
172     &wgtflag,
173     &numflag,
174     &dim,
175     xyz,
176     &ncon,
177     &mpiSize,
178     tpwgts,
179     ubvec,
180     options,
181     &edgecut,
182     partition, /* new CPU ownership of elements */
183     &(in->MPIInfo->comm));
184 gross 2368 /* printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1)); */
185 ksteube 1459 TMPMEMFREE(ubvec);
186     TMPMEMFREE(tpwgts);
187     } else {
188     for (i=0;i<myNumVertices;++i) partition[i]=0; /* CPU 0 owns it */
189     }
190     #else
191     for (i=0;i<myNumVertices;++i) partition[i]=myRank; /* CPU 0 owns it */
192     #endif
193    
194 ksteube 1315 }
195    
196     Paso_Pattern_free(pattern);
197    
198     /* create a new distributioin and labeling of the DOF */
199     memset(new_distribution,0,mpiSize_size);
200     #pragma omp parallel private(loc_partition_count)
201     {
202     loc_partition_count=THREAD_MEMALLOC(mpiSize,dim_t);
203     memset(loc_partition_count,0,mpiSize_size);
204     #pragma omp for private(i)
205     for (i=0;i<myNumVertices;++i) loc_partition_count[partition[i]]++ ;
206     #pragma omp critical
207     {
208     for (i=0;i<mpiSize;++i) new_distribution[i]+=loc_partition_count[i];
209     }
210     THREAD_MEMFREE(loc_partition_count);
211     }
212     #ifdef PASO_MPI
213 ksteube 1459 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
214     MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
215 ksteube 1315 #else
216 ksteube 1459 for (i=0;i<mpiSize;++i) recvbuf[i]=new_distribution[i];
217 ksteube 1315 #endif
218     new_distribution[0]=0;
219 ksteube 1459 for (rank=0; rank<mpiSize;rank++) {
220     c=0;
221     for (i=0;i<myRank;++i) c+=recvbuf[rank+mpiSize*i];
222     for (i=0;i<myNumVertices;++i) {
223     if (rank==partition[i]) {
224     newGlobalDOFID[i]=new_distribution[rank]+c;
225     c++;
226     }
227     }
228     for (i=myRank+1;i<mpiSize;++i) c+=recvbuf[rank+mpiSize*i];
229     new_distribution[rank+1]=new_distribution[rank]+c;
230 ksteube 1315 }
231 ksteube 1459 TMPMEMFREE(recvbuf);
232    
233 ksteube 1315 /* now the overlap needs to be created by sending the partition around*/
234    
235     dest=Paso_MPIInfo_mod(mpiSize, myRank + 1);
236     source=Paso_MPIInfo_mod(mpiSize, myRank - 1);
237     current_rank=myRank;
238     #pragma omp parallel for private(i)
239     for (i=0;i<in->Nodes->numNodes;++i) setNewDOFId[i]=TRUE;
240    
241     for (p=0; p< mpiSize; ++p) {
242    
243     firstVertex=distribution[current_rank];
244     lastVertex=distribution[current_rank+1];
245     #pragma omp parallel for private(i,j,k)
246     for (i=0;i<in->Nodes->numNodes;++i) {
247     k=in->Nodes->globalDegreesOfFreedom[i];
248     if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) {
249     in->Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex];
250     setNewDOFId[i]=FALSE;
251     }
252     }
253    
254     if (p<mpiSize-1) { /* the final send can be skipped */
255     #ifdef PASO_MPI
256     MPI_Sendrecv_replace(newGlobalDOFID,len, MPI_INT,
257     dest, in->MPIInfo->msg_tag_counter,
258     source, in->MPIInfo->msg_tag_counter,
259     in->MPIInfo->comm,&status);
260     #endif
261     in->MPIInfo->msg_tag_counter++;
262     current_rank=Paso_MPIInfo_mod(mpiSize, current_rank-1);
263     }
264     }
265     for (i=0;i<mpiSize+1;++i) distribution[i]=new_distribution[i];
266     }
267     TMPMEMFREE(index_list);
268     }
269     TMPMEMFREE(newGlobalDOFID);
270     TMPMEMFREE(setNewDOFId);
271     TMPMEMFREE(new_distribution);
272     TMPMEMFREE(partition_count);
273     TMPMEMFREE(partition);
274     TMPMEMFREE(xyz);
275     return;
276     }

  ViewVC Help
Powered by ViewVC 1.1.26