/[escript]/trunk/dudley/src/Mesh_optimizeDOFDistribution.cpp
ViewVC logotype

Contents of /trunk/dudley/src/Mesh_optimizeDOFDistribution.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6141 - (show annotations)
Wed Apr 6 03:51:30 2016 UTC (2 years, 4 months ago) by caltinay
File size: 8078 byte(s)
more namespacing of defines.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include "Mesh.h"
18 #include "IndexList.h"
19
20 #ifdef ESYS_HAVE_PARMETIS
21 #include "parmetis.h"
22 #ifndef REALTYPEWIDTH
23 typedef float real_t;
24 #endif
25 #endif
26
27 #include <boost/scoped_array.hpp>
28
29 namespace dudley {
30
31 #ifdef ESYS_HAVE_PARMETIS
32 // Check whether there is any rank which has no vertex. In case
33 // such a rank exists, we don't use parmetis since parmetis requires
34 // that every rank has at least 1 vertex (at line 129 of file
35 // "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
36 // any rank has no vertex).
37 static bool allRanksHaveNodes(escript::JMPI mpiInfo,
38 const std::vector<index_t>& distribution)
39 {
40 int ret = 1;
41
42 if (mpiInfo->rank == 0) {
43 for (int i = 0; i < mpiInfo->size; i++) {
44 if (distribution[i + 1] == distribution[i]) {
45 ret = 0;
46 break;
47 }
48 }
49 if (ret == 0)
50 std::cerr << "INFO: ParMetis is not used since at least one rank "
51 "has no vertex." << std::endl;
52 }
53 MPI_Bcast(&ret, 1, MPI_INTEGER, 0, mpiInfo->comm);
54 return ret==1;
55 }
56 #endif
57
58 /// optimizes the distribution of DOFs across processors using ParMETIS.
59 /// On return a new distribution is given and the globalDOF are relabeled
60 /// accordingly but the mesh has not been redistributed yet
61 void Mesh::optimizeDOFDistribution(std::vector<index_t>& distribution)
62 {
63 int mpiSize = MPIInfo->size;
64 const int myRank = MPIInfo->rank;
65 const index_t myFirstVertex = distribution[myRank];
66 const index_t myLastVertex = distribution[myRank + 1];
67 const dim_t myNumVertices = myLastVertex - myFirstVertex;
68 const dim_t numNodes = Nodes->getNumNodes();
69
70 // first step is to distribute the elements according to a global X of DOF
71 dim_t len = 0;
72 for (int p = 0; p < mpiSize; ++p)
73 len = std::max(len, distribution[p + 1] - distribution[p]);
74
75 index_t* partition = new index_t[len];
76
77 #ifdef ESYS_HAVE_PARMETIS
78 if (mpiSize > 1 && allRanksHaveNodes(MPIInfo, distribution)) {
79 boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]);
80 index_t dim = Nodes->numDim;
81
82 // create the adjacency structure xadj and adjncy
83 #pragma omp parallel
84 {
85 // insert contributions from element matrices into columns index
86 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
87 myFirstVertex, myLastVertex, Elements,
88 Nodes->globalDegreesOfFreedom);
89 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
90 myFirstVertex, myLastVertex, FaceElements,
91 Nodes->globalDegreesOfFreedom);
92 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
93 myFirstVertex, myLastVertex, Points,
94 Nodes->globalDegreesOfFreedom);
95 }
96
97 // create the local matrix pattern
98 const dim_t globalNumVertices = distribution[mpiSize];
99 paso::Pattern_ptr pattern = paso::Pattern::fromIndexListArray(0,
100 myNumVertices, index_list.get(), 0, globalNumVertices, 0);
101
102 // set the coordinates
103 real_t* xyz = new real_t[myNumVertices * dim];
104 #pragma omp parallel for
105 for (index_t i = 0; i < numNodes; ++i) {
106 const index_t k = Nodes->globalDegreesOfFreedom[i] - myFirstVertex;
107 if (k >= 0 && k < myNumVertices) {
108 for (int j = 0; j < dim; ++j)
109 xyz[k * dim + j] = (real_t)(Nodes->Coordinates[INDEX2(j, i, dim)]);
110 }
111 }
112
113 index_t wgtflag = 0;
114 index_t numflag = 0;
115 index_t ncon = 1;
116 index_t edgecut;
117 index_t impiSize = mpiSize;
118 index_t options[3] = { 1, 0, 0 };
119 std::vector<real_t> tpwgts(ncon * mpiSize, 1.f / mpiSize);
120 std::vector<real_t> ubvec(ncon, 1.05f);
121 ParMETIS_V3_PartGeomKway(&distribution[0], pattern->ptr,
122 pattern->index, NULL, NULL, &wgtflag, &numflag, &dim,
123 xyz, &ncon, &impiSize, &tpwgts[0], &ubvec[0], options,
124 &edgecut, partition, &MPIInfo->comm);
125 delete[] xyz;
126 } else {
127 for (index_t i = 0; i < myNumVertices; ++i)
128 partition[i] = 0; // CPU 0 owns all
129 }
130 #else
131 #pragma omp parallel for
132 for (index_t i = 0; i < myNumVertices; ++i)
133 partition[i] = myRank;
134 #endif // ESYS_HAVE_PARMETIS
135
136 // create a new distribution and labeling of the DOF
137 std::vector<index_t> new_distribution(mpiSize + 1);
138 #pragma omp parallel
139 {
140 std::vector<index_t> loc_partition_count(mpiSize);
141 #pragma omp for
142 for (index_t i = 0; i < myNumVertices; ++i)
143 loc_partition_count[partition[i]]++;
144 #pragma omp critical
145 {
146 for (int i = 0; i < mpiSize; ++i)
147 new_distribution[i] += loc_partition_count[i];
148 }
149 }
150
151 std::vector<index_t> recvbuf(mpiSize * mpiSize);
152 #ifdef ESYS_MPI
153 // recvbuf will be the concatenation of each CPU's contribution to
154 // new_distribution
155 MPI_Allgather(&new_distribution[0], mpiSize, MPI_DIM_T, &recvbuf[0],
156 mpiSize, MPI_DIM_T, MPIInfo->comm);
157 #else
158 for (int i = 0; i < mpiSize; ++i)
159 recvbuf[i] = new_distribution[i];
160 #endif
161 new_distribution[0] = 0;
162 index_t* newGlobalDOFID = new index_t[len];
163 for (int rank = 0; rank < mpiSize; rank++) {
164 index_t c = 0;
165 for (int i = 0; i < myRank; ++i)
166 c += recvbuf[rank + mpiSize * i];
167 for (index_t i = 0; i < myNumVertices; ++i) {
168 if (rank == partition[i]) {
169 newGlobalDOFID[i] = new_distribution[rank] + c;
170 c++;
171 }
172 }
173 for (int i = myRank + 1; i < mpiSize; ++i)
174 c += recvbuf[rank + mpiSize * i];
175 new_distribution[rank + 1] = new_distribution[rank] + c;
176 }
177
178 // now the overlap needs to be created by sending the partition around
179 #ifdef ESYS_MPI
180 int dest = MPIInfo->mod_rank(myRank + 1);
181 int source = MPIInfo->mod_rank(myRank - 1);
182 #endif
183 int current_rank = myRank;
184 bool* setNewDOFId = new bool[numNodes];
185 #pragma omp parallel for
186 for (index_t i = 0; i < numNodes; ++i)
187 setNewDOFId[i] = true;
188
189 for (int p = 0; p < mpiSize; ++p) {
190 const index_t firstVertex = distribution[current_rank];
191 const index_t lastVertex = distribution[current_rank + 1];
192 #pragma omp parallel for
193 for (index_t i = 0; i < numNodes; ++i) {
194 const index_t k = Nodes->globalDegreesOfFreedom[i];
195 if (setNewDOFId[i] && firstVertex <= k && k < lastVertex) {
196 Nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
197 setNewDOFId[i] = false;
198 }
199 }
200
201 if (p < mpiSize - 1) { // the final send can be skipped
202 #ifdef ESYS_MPI
203 MPI_Status status;
204 MPI_Sendrecv_replace(newGlobalDOFID, len, MPI_DIM_T,
205 dest, MPIInfo->counter(),
206 source, MPIInfo->counter(),
207 MPIInfo->comm, &status);
208 MPIInfo->incCounter();
209 #endif
210 current_rank = MPIInfo->mod_rank(current_rank - 1);
211 }
212 }
213 for (int i = 0; i < mpiSize + 1; ++i)
214 distribution[i] = new_distribution[i];
215
216 delete[] newGlobalDOFID;
217 delete[] setNewDOFId;
218 delete[] partition;
219 }
220
221 } // namespace dudley
222

Properties

Name Value
svn:mergeinfo /branches/4.0fordebian/dudley/src/Mesh_optimizeDOFDistribution.cpp:5567-5588 /branches/diaplayground/dudley/src/Mesh_optimizeDOFDistribution.cpp:4940-5147 /branches/lapack2681/finley/src/Mesh_optimizeDOFDistribution.cpp:2682-2741 /branches/pasowrap/dudley/src/Mesh_optimizeDOFDistribution.cpp:3661-3674 /branches/py3_attempt2/dudley/src/Mesh_optimizeDOFDistribution.cpp:3871-3891 /branches/restext/finley/src/Mesh_optimizeDOFDistribution.cpp:2610-2624 /branches/ripleygmg_from_3668/dudley/src/Mesh_optimizeDOFDistribution.cpp:3669-3791 /branches/stage3.0/finley/src/Mesh_optimizeDOFDistribution.cpp:2569-2590 /branches/symbolic_from_3470/dudley/src/Mesh_optimizeDOFDistribution.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/dudley/src/Mesh_optimizeDOFDistribution.cpp:3517-3974 /branches/trilinos_from_5897/dudley/src/Mesh_optimizeDOFDistribution.cpp:5898-6118 /release/3.0/finley/src/Mesh_optimizeDOFDistribution.cpp:2591-2601 /release/4.0/dudley/src/Mesh_optimizeDOFDistribution.cpp:5380-5406 /trunk/dudley/src/Mesh_optimizeDOFDistribution.cpp:4257-4344 /trunk/ripley/test/python/dudley/src/Mesh_optimizeDOFDistribution.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26