/[escript]/trunk/finley/src/Mesh_optimizeDOFDistribution.cpp
ViewVC logotype

Contents of /trunk/finley/src/Mesh_optimizeDOFDistribution.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6939 - (show annotations)
Mon Jan 20 03:37:18 2020 UTC (4 months, 1 week ago) by uqaeller
File size: 9242 byte(s)
Updated the copyright header.


1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2020 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014-2017 by Centre for Geoscience Computing (GeoComp)
14 * Development from 2019 by School of Earth and Environmental Sciences
15 **
16 *****************************************************************************/
17
18 #include "FinleyDomain.h"
19 #include "IndexList.h"
20
21 #include <escript/index.h>
22
23 #ifdef ESYS_HAVE_PARMETIS
24 #include <parmetis.h>
25 #ifndef REALTYPEWIDTH
26 typedef float real_t;
27 #endif
28 #endif
29
30 #include <iostream>
31 #include <boost/scoped_array.hpp>
32
33 namespace finley {
34
35 #ifdef ESYS_HAVE_PARMETIS
36 // Checks whether there is any rank which has no vertex. In case
37 // such a rank exists, we don't use parmetis since parmetis requires
38 // that every rank has at least 1 vertex (at line 129 of file
39 // "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
40 // any rank has no vertex).
41 static bool allRanksHaveNodes(escript::JMPI mpiInfo,
42 const IndexVector& distribution)
43 {
44 int ret = 1;
45
46 if (mpiInfo->rank == 0) {
47 for (int i = 0; i < mpiInfo->size; i++) {
48 if (distribution[i + 1] == distribution[i]) {
49 ret = 0;
50 break;
51 }
52 }
53 if (ret == 0) {
54 std::cerr << "INFO: ParMetis is not used since at least one rank "
55 "has no vertex." << std::endl;
56 }
57 }
58 MPI_Bcast(&ret, 1, MPI_INTEGER, 0, mpiInfo->comm);
59 return ret==1;
60 }
61 #endif
62
63 /// optimizes the distribution of DOFs across processors using ParMETIS.
64 /// On return a new distribution is given and the globalDOF are relabeled
65 /// accordingly but the mesh has not been redistributed yet
66 void FinleyDomain::optimizeDOFDistribution(IndexVector& distribution)
67 {
68 int mpiSize = m_mpiInfo->size;
69 const int myRank = m_mpiInfo->rank;
70 const index_t myFirstVertex = distribution[myRank];
71 const index_t myLastVertex = distribution[myRank + 1];
72 const dim_t myNumVertices = myLastVertex - myFirstVertex;
73 const dim_t numNodes = m_nodes->getNumNodes();
74
75 // first step is to distribute the elements according to a global X of DOF
76 dim_t len = 0;
77 for (int p = 0; p < mpiSize; ++p)
78 len = std::max(len, distribution[p + 1] - distribution[p]);
79
80 index_t* partition = new index_t[len];
81
82 #ifdef ESYS_HAVE_PARMETIS
83 if (mpiSize > 1 && allRanksHaveNodes(m_mpiInfo, distribution)) {
84 boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]);
85 int dim = m_nodes->numDim;
86
87 // create the adjacency structure xadj and adjncy
88 #pragma omp parallel
89 {
90 // insert contributions from element matrices into columns index
91 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
92 myFirstVertex, myLastVertex, m_elements,
93 m_nodes->globalDegreesOfFreedom, m_nodes->globalDegreesOfFreedom);
94 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
95 myFirstVertex, myLastVertex, m_faceElements,
96 m_nodes->globalDegreesOfFreedom, m_nodes->globalDegreesOfFreedom);
97 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
98 myFirstVertex, myLastVertex, m_contactElements,
99 m_nodes->globalDegreesOfFreedom, m_nodes->globalDegreesOfFreedom);
100 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
101 myFirstVertex, myLastVertex, m_points,
102 m_nodes->globalDegreesOfFreedom, m_nodes->globalDegreesOfFreedom);
103 }
104
105 // set the coordinates
106 real_t* xyz = new real_t[myNumVertices * dim];
107 #pragma omp parallel for
108 for (index_t i = 0; i < numNodes; ++i) {
109 const index_t k = m_nodes->globalDegreesOfFreedom[i] - myFirstVertex;
110 if (k >= 0 && k < myNumVertices) {
111 for (int j = 0; j < dim; ++j)
112 xyz[k * dim + j] = static_cast<real_t>(m_nodes->Coordinates[INDEX2(j, i, dim)]);
113 }
114 }
115
116 // create the local CSR matrix pattern
117 const dim_t globalNumVertices = distribution[mpiSize];
118 index_t* ptr = new index_t[myNumVertices + 1];
119 #pragma omp parallel for
120 for (index_t i = 0; i < myNumVertices; ++i) {
121 ptr[i] = index_list[i].count(0, globalNumVertices);
122 }
123 // accumulate ptr
124 dim_t s = 0;
125 for (index_t i = 0; i < myNumVertices; ++i) {
126 const index_t itmp = ptr[i];
127 ptr[i] = s;
128 s += itmp;
129 }
130 ptr[myNumVertices] = s;
131
132 // create index
133 index_t* index = new index_t[s];
134 #pragma omp parallel for
135 for (index_t i = 0; i < myNumVertices; ++i) {
136 index_list[i].toArray(&index[ptr[i]], 0, globalNumVertices, 0);
137 }
138
139 index_t wgtflag = 0;
140 index_t numflag = 0;
141 index_t ncon = 1;
142 index_t edgecut;
143 index_t impiSize = mpiSize;
144 index_t idim = dim;
145 // options[0]=1 -> non-default values, evaluate rest of options
146 // options[1]=0 -> debug level (no output)
147 // options[2] -> random seed
148 index_t options[3] = { 1, 0, 0 };
149 std::vector<real_t> tpwgts(ncon * mpiSize, 1.f / mpiSize);
150 std::vector<real_t> ubvec(ncon, 1.05f);
151 ParMETIS_V3_PartGeomKway(&distribution[0], ptr, index, NULL, NULL,
152 &wgtflag, &numflag, &idim, xyz, &ncon,
153 &impiSize, &tpwgts[0], &ubvec[0], options,
154 &edgecut, partition, &m_mpiInfo->comm);
155 delete[] xyz;
156 delete[] index;
157 delete[] ptr;
158 } else {
159 for (index_t i = 0; i < myNumVertices; ++i)
160 partition[i] = 0; // CPU 0 owns all
161 }
162 #else
163 #pragma omp parallel for
164 for (index_t i = 0; i < myNumVertices; ++i)
165 partition[i] = myRank; // CPU myRank owns all
166 #endif // ESYS_HAVE_PARMETIS
167
168 // create a new distribution and labeling of the DOF
169 IndexVector new_distribution(mpiSize + 1);
170 #pragma omp parallel
171 {
172 IndexVector loc_partition_count(mpiSize);
173 #pragma omp for
174 for (index_t i = 0; i < myNumVertices; ++i)
175 loc_partition_count[partition[i]]++;
176 #pragma omp critical
177 {
178 for (int i = 0; i < mpiSize; ++i)
179 new_distribution[i] += loc_partition_count[i];
180 }
181 }
182
183 IndexVector recvbuf(mpiSize * mpiSize);
184 #ifdef ESYS_MPI
185 // recvbuf will be the concatenation of each CPU's contribution to
186 // new_distribution
187 MPI_Allgather(&new_distribution[0], mpiSize, MPI_DIM_T, &recvbuf[0],
188 mpiSize, MPI_DIM_T, m_mpiInfo->comm);
189 #else
190 for (int i = 0; i < mpiSize; ++i)
191 recvbuf[i] = new_distribution[i];
192 #endif
193 new_distribution[0] = 0;
194 std::vector<index_t> newGlobalDOFID(len);
195 for (int rank = 0; rank < mpiSize; rank++) {
196 index_t c = 0;
197 for (int i = 0; i < myRank; ++i)
198 c += recvbuf[rank + mpiSize * i];
199 for (index_t i = 0; i < myNumVertices; ++i) {
200 if (rank == partition[i]) {
201 newGlobalDOFID[i] = new_distribution[rank] + c;
202 c++;
203 }
204 }
205 for (int i = myRank + 1; i < mpiSize; ++i)
206 c += recvbuf[rank + mpiSize * i];
207 new_distribution[rank + 1] = new_distribution[rank] + c;
208 }
209
210 // now the overlap needs to be created by sending the partition around
211 #ifdef ESYS_MPI
212 int dest = m_mpiInfo->mod_rank(myRank + 1);
213 int source = m_mpiInfo->mod_rank(myRank - 1);
214 #endif
215 int current_rank = myRank;
216 std::vector<short> setNewDOFId(numNodes, 1);
217
218 for (int p = 0; p < mpiSize; ++p) {
219 const index_t firstVertex = distribution[current_rank];
220 const index_t lastVertex = distribution[current_rank + 1];
221 #pragma omp parallel for
222 for (index_t i = 0; i < numNodes; ++i) {
223 const index_t k = m_nodes->globalDegreesOfFreedom[i];
224 if (setNewDOFId[i] && firstVertex <= k && k < lastVertex) {
225 m_nodes->globalDegreesOfFreedom[i] = newGlobalDOFID[k - firstVertex];
226 setNewDOFId[i] = 0;
227 }
228 }
229
230 if (p < mpiSize - 1) { // the final send can be skipped
231 #ifdef ESYS_MPI
232 MPI_Status status;
233 MPI_Sendrecv_replace(&newGlobalDOFID[0], len, MPI_DIM_T,
234 dest, m_mpiInfo->counter(),
235 source, m_mpiInfo->counter(),
236 m_mpiInfo->comm, &status);
237 m_mpiInfo->incCounter();
238 #endif
239 current_rank = m_mpiInfo->mod_rank(current_rank - 1);
240 }
241 }
242 for (int i = 0; i < mpiSize + 1; ++i)
243 distribution[i] = new_distribution[i];
244
245 delete[] partition;
246 }
247
248 } // namespace finley
249

Properties

Name Value
svn:mergeinfo /branches/4.0fordebian/finley/src/Mesh_optimizeDOFDistribution.cpp:5567-5588 /branches/diaplayground/finley/src/Mesh_optimizeDOFDistribution.cpp:4940-5147 /branches/lapack2681/finley/src/Mesh_optimizeDOFDistribution.cpp:2682-2741 /branches/pasowrap/finley/src/Mesh_optimizeDOFDistribution.cpp:3661-3674 /branches/py3_attempt2/finley/src/Mesh_optimizeDOFDistribution.cpp:3871-3891 /branches/restext/finley/src/Mesh_optimizeDOFDistribution.cpp:2610-2624 /branches/ripleygmg_from_3668/finley/src/Mesh_optimizeDOFDistribution.cpp:3669-3791 /branches/stage3.0/finley/src/Mesh_optimizeDOFDistribution.cpp:2569-2590 /branches/symbolic_from_3470/finley/src/Mesh_optimizeDOFDistribution.cpp:3471-3974 /branches/trilinos_from_5897/finley/src/Mesh_optimizeDOFDistribution.cpp:5898-6118 /release/3.0/finley/src/Mesh_optimizeDOFDistribution.cpp:2591-2601 /release/4.0/finley/src/Mesh_optimizeDOFDistribution.cpp:5380-5406 /trunk/finley/src/Mesh_optimizeDOFDistribution.cpp:4257-4344

  ViewVC Help
Powered by ViewVC 1.1.26