/[escript]/branches/doubleplusgood/finley/src/Mesh_optimizeDOFDistribution.cpp
ViewVC logotype

Contents of /branches/doubleplusgood/finley/src/Mesh_optimizeDOFDistribution.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4327 - (show annotations)
Wed Mar 20 05:09:11 2013 UTC (6 years, 7 months ago) by jfenwick
File size: 11995 byte(s)
some finley memory
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /************************************************************************************/
18
19 /* Finley: Mesh: optimizes the distribution of DOFs across processors */
20 /* using ParMETIS. On return a new distribution is given and the globalDOF */
21 /* are relabeled accordingly but the mesh is not redistributed yet. */
22
23 /************************************************************************************/
24
25 #include "Mesh.h"
26 #include "IndexList.h"
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30 #ifdef USE_PARMETIS
31 #include "parmetis.h"
32 #endif
33
34 /************************************************************************************
35 Check whether there is any node which has no vertex. In case
36 such a node exists, we don't use parmetis since parmetis requires
37 that every node has at least 1 vertex (at line 129 of file
38 "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
39 any node has no vertex).
40 ************************************************************************************/
41 #ifdef USE_PARMETIS
42 int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t *distribution, MPI_Comm *comm)
43 {
44 dim_t i, len;
45 int ret_val = 1;
46
47 if (rank == 0){
48 for (i=0; i<mpiSize; i++){
49 len = distribution[i+1] - distribution[i];
50 if (len == 0){
51 ret_val = 0;
52 break;
53 }
54 }
55 }
56 MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm);
57 if (ret_val == 0)
58 printf("INFO: Parmetis is not used since some nodes have no vertex!\n");
59 return ret_val;
60 }
61 #endif
62
63
64
65 /************************************************************************************/
66
67 void Finley_Mesh_optimizeDOFDistribution(Finley_Mesh* in,dim_t *distribution) {
68
69 dim_t dim, i,j,k, myNumVertices,p, mpiSize, len, globalNumVertices,*partition_count=NULL, *new_distribution=NULL, *loc_partition_count=NULL;
70 bool_t *setNewDOFId=NULL;
71 index_t myFirstVertex, myLastVertex, firstVertex, lastVertex, *newGlobalDOFID=NULL;
72 size_t mpiSize_size;
73 index_t* partition=NULL;
74 Paso_Pattern *pattern=NULL;
75 Esys_MPI_rank myRank,current_rank, rank;
76 Finley_IndexList* index_list=NULL;
77 float *xyz=NULL;
78 int c;
79
80 #ifdef ESYS_MPI
81 Esys_MPI_rank dest, source;
82 MPI_Status status;
83 #endif
84
85 if (in==NULL) return;
86 if (in->Nodes == NULL) return;
87
88 myRank=in->MPIInfo->rank;
89 mpiSize=in->MPIInfo->size;
90 mpiSize_size=mpiSize*sizeof(dim_t);
91 dim=in->Nodes->numDim;
92 /* first step is to distribute the elements according to a global X of DOF */
93
94 myFirstVertex=distribution[myRank];
95 myLastVertex=distribution[myRank+1];
96 myNumVertices=myLastVertex-myFirstVertex;
97 globalNumVertices=distribution[mpiSize];
98 len=0;
99 for (p=0;p<mpiSize;++p) len=MAX(len,distribution[p+1]-distribution[p]);
100 partition=new index_t[len]; /* len is used for the sending around of partition later on */
101 xyz=new float[myNumVertices*dim];
102 partition_count=new dim_t[mpiSize+1];
103 new_distribution=new dim_t[mpiSize+1];
104 newGlobalDOFID=new index_t[len];
105 setNewDOFId=new bool_t[in->Nodes->numNodes];
106 if (!(Finley_checkPtr(partition) || Finley_checkPtr(xyz) || Finley_checkPtr(partition_count) || Finley_checkPtr(partition_count) || Finley_checkPtr(newGlobalDOFID) || Finley_checkPtr(setNewDOFId))) {
107 dim_t *recvbuf=new dim_t[mpiSize*mpiSize];
108
109 /* set the coordinates: */
110 /* it is assumed that at least one node on this processor provides a coordinate */
111 #pragma omp parallel for private(i,j,k)
112 for (i=0;i<in->Nodes->numNodes;++i) {
113 k=in->Nodes->globalDegreesOfFreedom[i]-myFirstVertex;
114 if ((k>=0) && (k<myNumVertices)) {
115 for (j=0;j<dim;++j) xyz[k*dim+j]=(float)(in->Nodes->Coordinates[INDEX2(j,i,dim)]);
116 }
117 }
118
119 index_list=new Finley_IndexList[myNumVertices];
120 /* ksteube CSR of DOF IDs */
121 /* create the adjacency structure xadj and adjncy */
122 if (! Finley_checkPtr(index_list)) {
123 #pragma omp parallel private(i)
124 {
125 #pragma omp for schedule(static)
126 for(i=0;i<myNumVertices;++i) {
127 index_list[i].extension=NULL;
128 index_list[i].n=0;
129 }
130 /* ksteube build CSR format */
131 /* insert contributions from element matrices into columns index index_list: */
132 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
133 in->Elements,in->Nodes->globalDegreesOfFreedom,
134 in->Nodes->globalDegreesOfFreedom);
135 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
136 in->FaceElements,in->Nodes->globalDegreesOfFreedom,
137 in->Nodes->globalDegreesOfFreedom);
138 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
139 in->ContactElements,in->Nodes->globalDegreesOfFreedom,
140 in->Nodes->globalDegreesOfFreedom);
141 Finley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list, myFirstVertex, myLastVertex,
142 in->Points,in->Nodes->globalDegreesOfFreedom,
143 in->Nodes->globalDegreesOfFreedom);
144 }
145
146 /* create the local matrix pattern */
147 pattern=Finley_IndexList_createPattern(0,myNumVertices,index_list,0,globalNumVertices,0);
148
149 /* clean up index list */
150 if (index_list!=NULL) {
151 #pragma omp parallel for private(i)
152 for(i=0;i<myNumVertices;++i) Finley_IndexList_free(index_list[i].extension);
153 }
154
155 if (Finley_noError()) {
156
157 #ifdef USE_PARMETIS
158
159 if (mpiSize>1 &&
160 Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm))>0 ) {
161 int i;
162 int wgtflag = 0;
163 int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
164 int ncon = 1;
165 int edgecut;
166 int options[2];
167 float *tpwgts = new float[ncon*mpiSize];
168 float *ubvec = new float[ncon];
169 for (i=0; i<ncon*mpiSize; i++) tpwgts[i] = 1.0/(float)mpiSize;
170 for (i=0; i<ncon; i++) ubvec[i] = 1.05;
171 options[0] = 3;
172 options[1] = 15;
173 ParMETIS_V3_PartGeomKway(distribution,
174 pattern->ptr,
175 pattern->index,
176 NULL,
177 NULL,
178 &wgtflag,
179 &numflag,
180 &dim,
181 xyz,
182 &ncon,
183 &mpiSize,
184 tpwgts,
185 ubvec,
186 options,
187 &edgecut,
188 partition, /* new CPU ownership of elements */
189 &(in->MPIInfo->comm));
190 /* printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/MAX(in->MPIInfo->size,1)); */
191 delete[] ubvec;
192 delete[] tpwgts;
193 } else {
194 for (i=0;i<myNumVertices;++i) partition[i]=0; /* CPU 0 owns it */
195 }
196 #else
197 for (i=0;i<myNumVertices;++i) partition[i]=myRank; /* CPU myRank owns it */
198 #endif
199
200 }
201
202 Paso_Pattern_free(pattern);
203
204 /* create a new distribution and labeling of the DOF */
205 memset(new_distribution,0,mpiSize_size);
206 #pragma omp parallel private(loc_partition_count)
207 {
208 loc_partition_count=THREAD_MEMALLOC(mpiSize,dim_t);
209 memset(loc_partition_count,0,mpiSize_size);
210 #pragma omp for private(i)
211 for (i=0;i<myNumVertices;++i) loc_partition_count[partition[i]]++ ;
212 #pragma omp critical
213 {
214 for (i=0;i<mpiSize;++i) new_distribution[i]+=loc_partition_count[i];
215 }
216 THREAD_MEMFREE(loc_partition_count);
217 }
218 #ifdef ESYS_MPI
219 /* recvbuf will be the concatenation of each CPU's contribution to new_distribution */
220 MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
221 #else
222 for (i=0;i<mpiSize;++i) recvbuf[i]=new_distribution[i];
223 #endif
224 new_distribution[0]=0;
225 for (rank=0; rank<mpiSize;rank++) {
226 c=0;
227 for (i=0;i<myRank;++i) c+=recvbuf[rank+mpiSize*i];
228 for (i=0;i<myNumVertices;++i) {
229 if (rank==partition[i]) {
230 newGlobalDOFID[i]=new_distribution[rank]+c;
231 c++;
232 }
233 }
234 for (i=myRank+1;i<mpiSize;++i) c+=recvbuf[rank+mpiSize*i];
235 new_distribution[rank+1]=new_distribution[rank]+c;
236 }
237 delete[] recvbuf;
238
239 /* now the overlap needs to be created by sending the partition around*/
240 #ifdef ESYS_MPI
241 dest=Esys_MPIInfo_mod(mpiSize, myRank + 1);
242 source=Esys_MPIInfo_mod(mpiSize, myRank - 1);
243 #endif
244 current_rank=myRank;
245 #pragma omp parallel for private(i)
246 for (i=0;i<in->Nodes->numNodes;++i) setNewDOFId[i]=TRUE;
247
248 for (p=0; p< mpiSize; ++p) {
249
250 firstVertex=distribution[current_rank];
251 lastVertex=distribution[current_rank+1];
252 #pragma omp parallel for private(i,j,k)
253 for (i=0;i<in->Nodes->numNodes;++i) {
254 k=in->Nodes->globalDegreesOfFreedom[i];
255 if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) {
256 in->Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex];
257 setNewDOFId[i]=FALSE;
258 }
259 }
260
261 if (p<mpiSize-1) { /* the final send can be skipped */
262 #ifdef ESYS_MPI
263 MPI_Sendrecv_replace(newGlobalDOFID,len, MPI_INT,
264 dest, in->MPIInfo->msg_tag_counter,
265 source, in->MPIInfo->msg_tag_counter,
266 in->MPIInfo->comm,&status);
267 #endif
268 in->MPIInfo->msg_tag_counter++;
269 current_rank=Esys_MPIInfo_mod(mpiSize, current_rank-1);
270 }
271 }
272 for (i=0;i<mpiSize+1;++i) distribution[i]=new_distribution[i];
273 }
274 delete[] index_list;
275 }
276 delete[] newGlobalDOFID;
277 delete[] setNewDOFId;
278 delete[] new_distribution;
279 delete[] partition_count;
280 delete[] partition;
281 delete[] xyz;
282 return;
283 }

  ViewVC Help
Powered by ViewVC 1.1.26