/[escript]/branches/trilinos_from_5897/dudley/src/Mesh_optimizeDOFDistribution.cpp
ViewVC logotype

Contents of /branches/trilinos_from_5897/dudley/src/Mesh_optimizeDOFDistribution.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6009 - (show annotations)
Wed Mar 2 04:13:26 2016 UTC (3 years, 1 month ago) by caltinay
File size: 9596 byte(s)
Much needed sync with trunk...

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 /****************************************************************************/
18
19 /* Dudley: Mesh: optimizes the distribution of DOFs across processors */
20 /* using ParMETIS. On return a new distribution is given and the globalDOF are relabelled */
21 /* accordingly but the mesh has not been redistributed yet */
22
23 /****************************************************************************/
24
25 #include "Mesh.h"
26 #include "IndexList.h"
27
28 #ifdef USE_PARMETIS
29 #include "parmetis.h"
30 #ifndef REALTYPEWIDTH
31 typedef float real_t;
32 #endif
33 #endif
34
35 #include <boost/scoped_array.hpp>
36
37 namespace dudley {
38
39 /*****************************************************************************
40 Check whether there is any node which has no vertex. In case
41 such node exists, we don't use parmetis since parmetis requires
42 that every node has at least 1 vertex (at line 129 of file
43 "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
44 any node has no vertex).
45 *****************************************************************************/
46 #ifdef USE_PARMETIS
47 int Check_Inputs_For_Parmetis(dim_t mpiSize, dim_t rank, dim_t * distribution, MPI_Comm * comm)
48 {
49 dim_t i, len;
50 int ret_val = 1;
51
52 if (rank == 0) {
53 for (i = 0; i < mpiSize; i++) {
54 len = distribution[i + 1] - distribution[i];
55 if (len == 0) {
56 ret_val = 0;
57 break;
58 }
59 }
60 }
61 MPI_Bcast(&ret_val, 1, MPI_INTEGER, 0, *comm);
62 if (ret_val == 0)
63 printf("INFO: Parmetis is not used since some nodes have no vertex!\n");
64 return ret_val;
65 }
66 #endif
67
68 /*****************************************************************************/
69
70 void Dudley_Mesh_optimizeDOFDistribution(Dudley_Mesh* in, dim_t* distribution)
71 {
72 if (in == NULL || in->Nodes == NULL)
73 return;
74
75 dim_t i, k;
76 int rank;
77 int c;
78
79 const int myRank = in->MPIInfo->rank;
80 dim_t mpiSize = in->MPIInfo->size;
81
82 // first step is to distribute the elements according to a global X of DOF
83
84 index_t myFirstVertex = distribution[myRank];
85 index_t myLastVertex = distribution[myRank + 1];
86 dim_t myNumVertices = myLastVertex - myFirstVertex;
87 dim_t globalNumVertices = distribution[mpiSize];
88 dim_t len = 0;
89 for (dim_t p = 0; p < mpiSize; ++p)
90 len = std::max(len, distribution[p + 1] - distribution[p]);
91
92 index_t* partition = new index_t[len];
93 dim_t* partition_count = new dim_t[mpiSize + 1];
94 dim_t* new_distribution = new dim_t[mpiSize + 1];
95 index_t* newGlobalDOFID = new index_t[len];
96 bool* setNewDOFId = new bool[in->Nodes->numNodes];
97 dim_t* recvbuf = new dim_t[mpiSize * mpiSize];
98
99 #ifdef USE_PARMETIS
100 dim_t dim = in->Nodes->numDim;
101 real_t* xyz = new real_t[myNumVertices * dim];
102
103 /* set the coordinates: */
104 /* it is assumed that at least one node on this processor provides a coordinate */
105 #pragma omp parallel for private(i,k)
106 for (i = 0; i < in->Nodes->numNodes; ++i)
107 {
108 k = in->Nodes->globalDegreesOfFreedom[i] - myFirstVertex;
109 if ((k >= 0) && (k < myNumVertices))
110 {
111 for (dim_t j = 0; j < dim; ++j)
112 xyz[k * dim + j] = (real_t)(in->Nodes->Coordinates[INDEX2(j, i, dim)]);
113 }
114 }
115 #endif // USE_PARMETIS
116
117 boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]);
118 /* ksteube CSR of DOF IDs */
119 /* create the adjacency structure xadj and adjncy */
120 #pragma omp parallel
121 {
122 /* ksteube build CSR format */
123 /* insert contributions from element matrices into columns index index_list: */
124 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
125 myFirstVertex, myLastVertex, in->Elements,
126 in->Nodes->globalDegreesOfFreedom,
127 in->Nodes->globalDegreesOfFreedom);
128 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
129 myFirstVertex, myLastVertex, in->FaceElements,
130 in->Nodes->globalDegreesOfFreedom,
131 in->Nodes->globalDegreesOfFreedom);
132 Dudley_IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
133 myFirstVertex, myLastVertex, in->Points,
134 in->Nodes->globalDegreesOfFreedom,
135 in->Nodes->globalDegreesOfFreedom);
136 }
137
138 /* create the local matrix pattern */
139 paso::Pattern_ptr pattern = paso::Pattern::fromIndexListArray(0,
140 myNumVertices, index_list.get(), 0, globalNumVertices, 0);
141
142 #ifdef USE_PARMETIS
143 if (mpiSize > 1 && Check_Inputs_For_Parmetis(mpiSize, myRank, distribution, &(in->MPIInfo->comm)) > 0)
144 {
145 int i;
146 int wgtflag = 0;
147 int numflag = 0; /* pattern->ptr is C style: starting from 0 instead of 1 */
148 int ncon = 1;
149 int edgecut;
150 int options[3];
151 real_t *tpwgts = new real_t[ncon * mpiSize];
152 real_t *ubvec = new real_t[ncon];
153 for (i = 0; i < ncon * mpiSize; i++)
154 tpwgts[i] = 1.0 / (real_t)mpiSize;
155 for (i = 0; i < ncon; i++)
156 ubvec[i] = 1.05;
157 options[0] = 1;
158 options[1] = 15;
159 options[2] = 0;
160 ParMETIS_V3_PartGeomKway(distribution, pattern->ptr,
161 pattern->index, NULL, NULL, &wgtflag, &numflag, &dim,
162 xyz, &ncon, &mpiSize, tpwgts, ubvec, options, &edgecut,
163 partition, /* new CPU ownership of elements */
164 &in->MPIInfo->comm);
165 //printf("ParMETIS number of edges cut by partitioning per processor: %d\n", edgecut/std::max(in->MPIInfo->size,1));
166 delete[] xyz;
167 delete[] ubvec;
168 delete[] tpwgts;
169 }
170 else
171 {
172 for (i = 0; i < myNumVertices; ++i)
173 partition[i] = 0; /* CPU 0 owns it */
174 }
175 #else
176 for (i = 0; i < myNumVertices; ++i)
177 partition[i] = myRank; /* CPU 0 owns it */
178 #endif // USE_PARMETIS
179
180 // create a new distribution and labelling of the DOF
181 const size_t mpiSize_size = mpiSize * sizeof(dim_t);
182 memset(new_distribution, 0, mpiSize_size);
183 #pragma omp parallel
184 {
185 dim_t* loc_partition_count = new dim_t[mpiSize];
186 memset(loc_partition_count, 0, mpiSize_size);
187 #pragma omp for private(i)
188 for (i = 0; i < myNumVertices; ++i)
189 loc_partition_count[partition[i]]++;
190 #pragma omp critical
191 {
192 for (i = 0; i < mpiSize; ++i)
193 new_distribution[i] += loc_partition_count[i];
194 }
195 delete[] loc_partition_count;
196 }
197 #ifdef ESYS_MPI
198 // recvbuf will be the concatenation of each CPU's contribution to
199 // new_distribution
200 MPI_Allgather(new_distribution, mpiSize, MPI_INT, recvbuf, mpiSize, MPI_INT, in->MPIInfo->comm);
201 #else
202 for (i = 0; i < mpiSize; ++i)
203 recvbuf[i] = new_distribution[i];
204 #endif
205 new_distribution[0] = 0;
206 for (rank = 0; rank < mpiSize; rank++)
207 {
208 c = 0;
209 for (i = 0; i < myRank; ++i)
210 c += recvbuf[rank + mpiSize * i];
211 for (i = 0; i < myNumVertices; ++i)
212 {
213 if (rank == partition[i])
214 {
215 newGlobalDOFID[i] = new_distribution[rank] + c;
216 c++;
217 }
218 }
219 for (i = myRank + 1; i < mpiSize; ++i)
220 c += recvbuf[rank + mpiSize * i];
221 new_distribution[rank + 1] = new_distribution[rank] + c;
222 }
223 delete[] recvbuf;
224
225 // now the overlap needs to be created by sending the partition around
226 #ifdef ESYS_MPI
227 int dest = in->MPIInfo->mod_rank(myRank + 1);
228 int source = in->MPIInfo->mod_rank(myRank - 1);
229 #endif
230 int current_rank = myRank;
231 #pragma omp parallel for private(i)
232 for (i = 0; i < in->Nodes->numNodes; ++i)
233 setNewDOFId[i] = true;
234
235 for (dim_t p = 0; p < mpiSize; ++p)
236 {
237 index_t firstVertex = distribution[current_rank];
238 index_t lastVertex = distribution[current_rank + 1];
239 #pragma omp parallel for private(i,k)
240 for (i = 0; i < in->Nodes->numNodes; ++i)
241 {
242 k = in->Nodes->globalDegreesOfFreedom[i];
243 if (setNewDOFId[i] && (firstVertex <= k) && (k < lastVertex))
244 {
245 in->Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
246 setNewDOFId[i] = false;
247 }
248 }
249
250 if (p < mpiSize - 1)
251 { /* the final send can be skipped */
252 #ifdef ESYS_MPI
253 MPI_Status status;
254 MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_INT,
255 dest, in->MPIInfo->counter(),
256 source, in->MPIInfo->counter(), in->MPIInfo->comm, &status);
257 in->MPIInfo->incCounter();
258 #endif
259 current_rank = in->MPIInfo->mod_rank(current_rank - 1);
260 }
261 }
262 for (i = 0; i < mpiSize + 1; ++i)
263 distribution[i] = new_distribution[i];
264
265 delete[] newGlobalDOFID;
266 delete[] setNewDOFId;
267 delete[] new_distribution;
268 delete[] partition_count;
269 delete[] partition;
270 }
271
272 } // namespace dudley
273

Properties

Name Value
svn:mergeinfo /branches/4.0fordebian/dudley/src/Mesh_optimizeDOFDistribution.cpp:5567-5588 /branches/diaplayground/dudley/src/Mesh_optimizeDOFDistribution.cpp:4940-5147 /branches/lapack2681/finley/src/Mesh_optimizeDOFDistribution.cpp:2682-2741 /branches/pasowrap/dudley/src/Mesh_optimizeDOFDistribution.cpp:3661-3674 /branches/py3_attempt2/dudley/src/Mesh_optimizeDOFDistribution.cpp:3871-3891 /branches/restext/finley/src/Mesh_optimizeDOFDistribution.cpp:2610-2624 /branches/ripleygmg_from_3668/dudley/src/Mesh_optimizeDOFDistribution.cpp:3669-3791 /branches/stage3.0/finley/src/Mesh_optimizeDOFDistribution.cpp:2569-2590 /branches/symbolic_from_3470/dudley/src/Mesh_optimizeDOFDistribution.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/dudley/src/Mesh_optimizeDOFDistribution.cpp:3517-3974 /release/3.0/finley/src/Mesh_optimizeDOFDistribution.cpp:2591-2601 /release/4.0/dudley/src/Mesh_optimizeDOFDistribution.cpp:5380-5406 /trunk/dudley/src/Mesh_optimizeDOFDistribution.cpp:4257-4344,5898-6007 /trunk/ripley/test/python/dudley/src/Mesh_optimizeDOFDistribution.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26