/[escript]/branches/trilinos_from_5897/dudley/src/ElementFile_distributeByRankOfDOF.cpp
ViewVC logotype

Contents of /branches/trilinos_from_5897/dudley/src/ElementFile_distributeByRankOfDOF.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6009 - (show annotations)
Wed Mar 2 04:13:26 2016 UTC (3 years, 1 month ago) by caltinay
File size: 9336 byte(s)
Much needed sync with trunk...

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 /****************************************************************************
18
19 Dudley: ElementFile: this will redistribute the Elements including overlap by
20
21 *****************************************************************************/
22
23 #include "ElementFile.h"
24
25 namespace dudley {
26
27 void Dudley_ElementFile_distributeByRankOfDOF(Dudley_ElementFile* self, int* mpiRankOfDOF, index_t* Id)
28 {
29 if (self == NULL)
30 return;
31 dim_t e, i;
32 int myRank = self->MPIInfo->rank;
33 dim_t NN = self->numNodes;
34 dim_t size = self->MPIInfo->size;
35 if (size > 1)
36 {
37 #ifdef ESYS_MPI
38 int p, *Owner_buffer = NULL, loc_proc_mask_max;
39 dim_t j, *send_count = NULL, *recv_count = NULL, *newOwner = NULL;
40 dim_t *loc_proc_mask = NULL, *loc_send_count = NULL;
41 dim_t newNumElements, numElementsInBuffer;
42 index_t *send_offset = NULL, *recv_offset = NULL, *Id_buffer = NULL, *Tag_buffer = NULL, *Nodes_buffer = NULL, k;
43 bool *proc_mask = NULL;
44 size_t size_size = size * sizeof(dim_t);
45 dim_t numRequests = 0;
46 MPI_Request *mpi_requests = NULL;
47 MPI_Status *mpi_stati = NULL;
48 mpi_requests = new MPI_Request[8 * size];
49 mpi_stati = new MPI_Status[8 * size];
50 // count the number elements that have to be sent to each processor
51 // (send_count) and define a new element owner as the processor with
52 // the largest number of DOFs and the smallest id
53 send_count = new dim_t[size];
54 recv_count = new dim_t[size];
55 newOwner = new int[self->numElements];
56 memset(send_count, 0, size_size);
57 #pragma omp parallel private(p,loc_proc_mask,loc_send_count)
58 {
59 loc_proc_mask = new dim_t[size];
60 loc_send_count = new dim_t[size];
61 memset(loc_send_count, 0, size_size);
62 #pragma omp for private(e,j,loc_proc_mask_max) schedule(static)
63 for (e = 0; e < self->numElements; e++)
64 {
65 if (self->Owner[e] == myRank)
66 {
67 newOwner[e] = myRank;
68 memset(loc_proc_mask, 0, size_size);
69 for (j = 0; j < NN; j++)
70 {
71 p = mpiRankOfDOF[self->Nodes[INDEX2(j, e, NN)]];
72 loc_proc_mask[p]++;
73 }
74 loc_proc_mask_max = 0;
75 for (p = 0; p < size; ++p)
76 {
77 if (loc_proc_mask[p] > 0)
78 loc_send_count[p]++;
79 if (loc_proc_mask[p] > loc_proc_mask_max)
80 {
81 newOwner[e] = p;
82 loc_proc_mask_max = loc_proc_mask[p];
83 }
84 }
85 }
86 else
87 {
88 newOwner[e] = -1;
89 }
90 }
91 #pragma omp critical
92 {
93 for (p = 0; p < size; ++p)
94 send_count[p] += loc_send_count[p];
95 }
96 delete[] loc_proc_mask;
97 delete[] loc_send_count;
98 }
99 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, self->MPIInfo->comm);
100 /* get the new number of elements for this processor */
101 newNumElements = 0;
102 for (p = 0; p < size; ++p)
103 newNumElements += recv_count[p];
104
105 /* get the new number of elements for this processor */
106 numElementsInBuffer = 0;
107 for (p = 0; p < size; ++p)
108 numElementsInBuffer += send_count[p];
109 /* allocate buffers */
110 Id_buffer = new index_t[numElementsInBuffer];
111 Tag_buffer = new index_t[numElementsInBuffer];
112 Owner_buffer = new int[numElementsInBuffer];
113 Nodes_buffer = new index_t[numElementsInBuffer * NN];
114 send_offset = new index_t[size];
115 recv_offset = new index_t[size];
116 proc_mask = new bool[size];
117
118 /* calculate the offsets for the processor buffers */
119 recv_offset[0] = 0;
120 for (p = 0; p < size - 1; ++p)
121 recv_offset[p + 1] = recv_offset[p] + recv_count[p];
122 send_offset[0] = 0;
123 for (p = 0; p < size - 1; ++p)
124 send_offset[p + 1] = send_offset[p] + send_count[p];
125
126 memset(send_count, 0, size_size);
127 /* copy element into buffers. proc_mask makes sure that an
128 * element is copied once only for each processor */
129 for (e = 0; e < self->numElements; e++)
130 {
131 if (self->Owner[e] == myRank)
132 {
133 memset(proc_mask, true, size*sizeof(bool));
134 for (j = 0; j < NN; j++)
135 {
136 p = mpiRankOfDOF[self->Nodes[INDEX2(j, e, NN)]];
137 if (proc_mask[p])
138 {
139 k = send_offset[p] + send_count[p];
140 Id_buffer[k] = self->Id[e];
141 Tag_buffer[k] = self->Tag[e];
142 Owner_buffer[k] = newOwner[e];
143 for (i = 0; i < NN; i++)
144 Nodes_buffer[INDEX2(i, k, NN)] = Id[self->Nodes[INDEX2(i, e, NN)]];
145 send_count[p]++;
146 proc_mask[p] = false;
147 }
148 }
149 }
150 }
151 /* allocate new tables */
152 Dudley_ElementFile_allocTable(self, newNumElements);
153
154 /* start to receive new elements */
155 for (p = 0; p < size; ++p)
156 {
157 if (recv_count[p] > 0)
158 {
159 MPI_Irecv(&(self->Id[recv_offset[p]]), recv_count[p],
160 MPI_INT, p, self->MPIInfo->counter() + myRank,
161 self->MPIInfo->comm, &mpi_requests[numRequests]);
162 numRequests++;
163 MPI_Irecv(&(self->Tag[recv_offset[p]]), recv_count[p],
164 MPI_INT, p, self->MPIInfo->counter() + size + myRank,
165 self->MPIInfo->comm, &mpi_requests[numRequests]);
166 numRequests++;
167 MPI_Irecv(&(self->Owner[recv_offset[p]]), recv_count[p],
168 MPI_INT, p, self->MPIInfo->counter() + 2 * size + myRank,
169 self->MPIInfo->comm, &mpi_requests[numRequests]);
170 numRequests++;
171 MPI_Irecv(&(self->Nodes[recv_offset[p] * NN]), recv_count[p] * NN,
172 MPI_INT, p, self->MPIInfo->counter() + 3 * size + myRank,
173 self->MPIInfo->comm, &mpi_requests[numRequests]);
174 numRequests++;
175 }
176 }
177 /* now the buffers can be send away */
178 for (p = 0; p < size; ++p)
179 {
180 if (send_count[p] > 0)
181 {
182 MPI_Issend(&(Id_buffer[send_offset[p]]), send_count[p],
183 MPI_INT, p, self->MPIInfo->counter() + p,
184 self->MPIInfo->comm, &mpi_requests[numRequests]);
185 numRequests++;
186 MPI_Issend(&(Tag_buffer[send_offset[p]]), send_count[p],
187 MPI_INT, p, self->MPIInfo->counter() + size + p,
188 self->MPIInfo->comm, &mpi_requests[numRequests]);
189 numRequests++;
190 MPI_Issend(&(Owner_buffer[send_offset[p]]), send_count[p],
191 MPI_INT, p, self->MPIInfo->counter() + 2 * size + p,
192 self->MPIInfo->comm, &mpi_requests[numRequests]);
193 numRequests++;
194 MPI_Issend(&(Nodes_buffer[send_offset[p] * NN]), send_count[p] * NN,
195 MPI_INT, p, self->MPIInfo->counter() + 3 * size + p,
196 self->MPIInfo->comm, &mpi_requests[numRequests]);
197 numRequests++;
198 }
199 }
200 /* wait for the requests to be finalized */
201 self->MPIInfo->incCounter(4 * size);
202 MPI_Waitall(numRequests, mpi_requests, mpi_stati);
203 /* clear buffer */
204 delete[] Id_buffer;
205 delete[] Tag_buffer;
206 delete[] Owner_buffer;
207 delete[] Nodes_buffer;
208 delete[] send_offset;
209 delete[] recv_offset;
210 delete[] proc_mask;
211 delete[] mpi_requests;
212 delete[] mpi_stati;
213 delete[] send_count;
214 delete[] recv_count;
215 delete[] newOwner;
216 #endif
217 } else { // single rank
218 #pragma omp for private(e,i) schedule(static)
219 for (e = 0; e < self->numElements; e++)
220 {
221 self->Owner[e] = myRank;
222 for (i = 0; i < NN; i++)
223 self->Nodes[INDEX2(i, e, NN)] = Id[self->Nodes[INDEX2(i, e, NN)]];
224 }
225 }
226 }
227
228 } // namespace dudley
229

Properties

Name Value
svn:mergeinfo /branches/4.0fordebian/dudley/src/ElementFile_distributeByRankOfDOF.cpp:5567-5588 /branches/lapack2681/finley/src/ElementFile_distributeByRankOfDOF.cpp:2682-2741 /branches/pasowrap/dudley/src/ElementFile_distributeByRankOfDOF.cpp:3661-3674 /branches/py3_attempt2/dudley/src/ElementFile_distributeByRankOfDOF.cpp:3871-3891 /branches/restext/finley/src/ElementFile_distributeByRankOfDOF.cpp:2610-2624 /branches/ripleygmg_from_3668/dudley/src/ElementFile_distributeByRankOfDOF.cpp:3669-3791 /branches/stage3.0/finley/src/ElementFile_distributeByRankOfDOF.cpp:2569-2590 /branches/symbolic_from_3470/dudley/src/ElementFile_distributeByRankOfDOF.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/dudley/src/ElementFile_distributeByRankOfDOF.cpp:3517-3974 /release/3.0/finley/src/ElementFile_distributeByRankOfDOF.cpp:2591-2601 /release/4.0/dudley/src/ElementFile_distributeByRankOfDOF.cpp:5380-5406 /trunk/dudley/src/ElementFile_distributeByRankOfDOF.cpp:4257-4344,5898-6007 /trunk/ripley/test/python/dudley/src/ElementFile_distributeByRankOfDOF.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26