/[escript]/trunk/finley/src/Mesh_optimizeDOFDistribution.cpp
ViewVC logotype

Contents of /trunk/finley/src/Mesh_optimizeDOFDistribution.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6141 - (show annotations)
Wed Apr 6 03:51:30 2016 UTC (2 years, 8 months ago) by caltinay
File size: 8623 byte(s)
more namespacing of defines.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************
19
20 Finley: Mesh: optimizes the distribution of DOFs across processors
21 using ParMETIS. On return a new distribution is given and the globalDOF
22 are relabeled accordingly but the mesh is not redistributed yet.
23
24 *****************************************************************************/
25 #include "Mesh.h"
26 #include "IndexList.h"
27
28 #ifdef ESYS_HAVE_PARMETIS
29 #include <parmetis.h>
30 #ifndef REALTYPEWIDTH
31 typedef float real_t;
32 #endif
33 #endif
34
35 #include <iostream>
36 #include <boost/scoped_array.hpp>
37
38 namespace finley {
39
40 #ifdef ESYS_HAVE_PARMETIS
41 // Checks whether there is any rank which has no vertex. In case
42 // such a rank exists, we don't use parmetis since parmetis requires
43 // that every rank has at least 1 vertex (at line 129 of file
44 // "xyzpart.c" in parmetis 3.1.1, variable "nvtxs" would be 0 if
45 // any rank has no vertex).
46 bool allRanksHaveNodes(escript::JMPI& mpiInfo, const std::vector<index_t>& distribution)
47 {
48 int ret = 1;
49
50 if (mpiInfo->rank == 0) {
51 for (int i=0; i<mpiInfo->size; i++) {
52 if (distribution[i+1] == distribution[i]) {
53 ret = 0;
54 break;
55 }
56 }
57 if (ret == 0) {
58 std::cout << "Mesh::optimizeDOFDistribution: "
59 << "Parmetis is not used since at least one rank has no vertex!"
60 << std::endl;
61 }
62 }
63 MPI_Bcast(&ret, 1, MPI_INTEGER, 0, mpiInfo->comm);
64 return (ret==1);
65 }
66 #endif
67
68
69 /****************************************************************************/
70
71 void Mesh::optimizeDOFDistribution(std::vector<index_t>& distribution)
72 {
73 // these two are not const because of parmetis call
74 int mpiSize=MPIInfo->size;
75 const int myRank=MPIInfo->rank;
76 const index_t myFirstVertex=distribution[myRank];
77 const index_t myLastVertex=distribution[myRank+1];
78 const dim_t myNumVertices=myLastVertex-myFirstVertex;
79
80 // first step is to distribute the elements according to a global X of DOF
81 // len is used for the sending around of partition later on
82 index_t len=0;
83 for (int p=0; p<mpiSize; ++p)
84 len=std::max(len, distribution[p+1]-distribution[p]);
85 std::vector<index_t> partition(len);
86
87 #ifdef ESYS_HAVE_PARMETIS
88 if (mpiSize>1 && allRanksHaveNodes(MPIInfo, distribution)) {
89 boost::scoped_array<IndexList> index_list(new IndexList[myNumVertices]);
90 int dim=Nodes->numDim;
91 // create the adjacency structure xadj and adjncy
92 #pragma omp parallel
93 {
94 // insert contributions from element matrices into columns index
95 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
96 myFirstVertex, myLastVertex, Elements,
97 Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom);
98 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
99 myFirstVertex, myLastVertex, FaceElements,
100 Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom);
101 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
102 myFirstVertex, myLastVertex, ContactElements,
103 Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom);
104 IndexList_insertElementsWithRowRangeNoMainDiagonal(index_list.get(),
105 myFirstVertex, myLastVertex, Points,
106 Nodes->globalDegreesOfFreedom, Nodes->globalDegreesOfFreedom);
107 }
108
109 // create the local matrix pattern
110 const dim_t globalNumVertices=distribution[mpiSize];
111 paso::Pattern_ptr pattern(paso::Pattern::fromIndexListArray(0,
112 myNumVertices, index_list.get(), 0, globalNumVertices, 0));
113 // set the coordinates
114 std::vector<real_t> xyz(myNumVertices*dim);
115 #pragma omp parallel for
116 for (index_t i=0; i<Nodes->numNodes; ++i) {
117 const index_t k=Nodes->globalDegreesOfFreedom[i]-myFirstVertex;
118 if (k>=0 && k<myNumVertices) {
119 for (int j=0; j<dim; ++j)
120 xyz[k*dim+j]=static_cast<real_t>(Nodes->Coordinates[INDEX2(j,i,dim)]);
121 }
122 }
123
124 index_t wgtflag = 0;
125 index_t numflag = 0;
126 index_t ncon = 1;
127 index_t edgecut;
128 index_t impiSize = mpiSize;
129 index_t idim = dim;
130 // options[0]=1 -> non-default values, evaluate rest of options
131 // options[1]=0 -> debug level (no output)
132 // options[2] -> random seed
133 index_t options[3] = { 1, 0, 0 };
134 std::vector<real_t> tpwgts(ncon*mpiSize, 1.f/mpiSize);
135 std::vector<real_t> ubvec(ncon, 1.05f);
136 ParMETIS_V3_PartGeomKway(&distribution[0], pattern->ptr, pattern->index,
137 NULL, NULL, &wgtflag, &numflag, &idim, &xyz[0],
138 &ncon, &impiSize, &tpwgts[0], &ubvec[0], options,
139 &edgecut, &partition[0], &MPIInfo->comm);
140 } else {
141 for (index_t i=0; i<myNumVertices; ++i)
142 partition[i]=0; // CPU 0 owns all
143 }
144 #else
145 for (index_t i=0; i<myNumVertices; ++i)
146 partition[i]=myRank; // CPU myRank owns all
147 #endif
148
149 // create a new distribution and labeling of the DOF
150 std::vector<index_t> new_distribution(mpiSize+1, 0);
151 #pragma omp parallel
152 {
153 std::vector<int> loc_partition_count(mpiSize, 0);
154 #pragma omp for
155 for (index_t i=0; i<myNumVertices; ++i)
156 loc_partition_count[partition[i]]++;
157 #pragma omp critical
158 {
159 for (int i=0; i<mpiSize; ++i)
160 new_distribution[i]+=loc_partition_count[i];
161 }
162 }
163 index_t *recvbuf=new index_t[mpiSize*mpiSize];
164 #ifdef ESYS_MPI
165 // recvbuf will be the concatenation of each CPU's contribution to
166 // new_distribution
167 MPI_Allgather(&new_distribution[0], mpiSize, MPI_DIM_T, recvbuf, mpiSize,
168 MPI_INT, MPIInfo->comm);
169 #else
170 for (int i=0; i<mpiSize; ++i)
171 recvbuf[i]=new_distribution[i];
172 #endif
173 new_distribution[0]=0;
174 std::vector<index_t> newGlobalDOFID(len);
175 for (int rank=0; rank<mpiSize; rank++) {
176 index_t c=0;
177 for (int i=0; i<myRank; ++i)
178 c+=recvbuf[rank+mpiSize*i];
179 for (index_t i=0; i<myNumVertices; ++i) {
180 if (rank==partition[i]) {
181 newGlobalDOFID[i]=new_distribution[rank]+c;
182 c++;
183 }
184 }
185 for (int i=myRank+1; i<mpiSize; ++i)
186 c+=recvbuf[rank+mpiSize*i];
187 new_distribution[rank+1]=new_distribution[rank]+c;
188 }
189 delete[] recvbuf;
190
191 // now the overlap needs to be created by sending the partition around
192 #ifdef ESYS_MPI
193 int dest = MPIInfo->mod_rank(myRank + 1);
194 int source = MPIInfo->mod_rank(myRank - 1);
195 #endif
196 int current_rank=myRank;
197 std::vector<short> setNewDOFId(Nodes->numNodes, 1);
198
199 for (int p=0; p<mpiSize; ++p) {
200 const index_t firstVertex=distribution[current_rank];
201 const index_t lastVertex=distribution[current_rank+1];
202 #pragma omp parallel for
203 for (index_t i=0; i<Nodes->numNodes; ++i) {
204 const index_t k=Nodes->globalDegreesOfFreedom[i];
205 if (setNewDOFId[i] && (firstVertex<=k) && (k<lastVertex)) {
206 Nodes->globalDegreesOfFreedom[i]=newGlobalDOFID[k-firstVertex];
207 setNewDOFId[i]=0;
208 }
209 }
210
211 if (p<mpiSize-1) { // the final send can be skipped
212 #ifdef ESYS_MPI
213 MPI_Status status;
214 MPI_Sendrecv_replace(&newGlobalDOFID[0], len, MPI_DIM_T,
215 dest, MPIInfo->counter(),
216 source, MPIInfo->counter(),
217 MPIInfo->comm, &status);
218 MPIInfo->incCounter();
219 #endif
220 current_rank=MPIInfo->mod_rank(current_rank-1);
221 }
222 }
223 for (int i=0; i<mpiSize+1; ++i)
224 distribution[i]=new_distribution[i];
225 }
226
227 } // namespace finley
228

Properties

Name Value
svn:mergeinfo /branches/4.0fordebian/finley/src/Mesh_optimizeDOFDistribution.cpp:5567-5588 /branches/diaplayground/finley/src/Mesh_optimizeDOFDistribution.cpp:4940-5147 /branches/lapack2681/finley/src/Mesh_optimizeDOFDistribution.cpp:2682-2741 /branches/pasowrap/finley/src/Mesh_optimizeDOFDistribution.cpp:3661-3674 /branches/py3_attempt2/finley/src/Mesh_optimizeDOFDistribution.cpp:3871-3891 /branches/restext/finley/src/Mesh_optimizeDOFDistribution.cpp:2610-2624 /branches/ripleygmg_from_3668/finley/src/Mesh_optimizeDOFDistribution.cpp:3669-3791 /branches/stage3.0/finley/src/Mesh_optimizeDOFDistribution.cpp:2569-2590 /branches/symbolic_from_3470/finley/src/Mesh_optimizeDOFDistribution.cpp:3471-3974 /branches/trilinos_from_5897/finley/src/Mesh_optimizeDOFDistribution.cpp:5898-6118 /release/3.0/finley/src/Mesh_optimizeDOFDistribution.cpp:2591-2601 /release/4.0/finley/src/Mesh_optimizeDOFDistribution.cpp:5380-5406 /trunk/finley/src/Mesh_optimizeDOFDistribution.cpp:4257-4344

  ViewVC Help
Powered by ViewVC 1.1.26