/[escript]/trunk/dudley/src/ElementFile_distributeByRankOfDOF.c
ViewVC logotype

Contents of /trunk/dudley/src/ElementFile_distributeByRankOfDOF.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3259 - (show annotations)
Mon Oct 11 01:48:14 2010 UTC (9 years ago) by jfenwick
File MIME type: text/plain
File size: 8323 byte(s)
Merging dudley and scons updates from branches

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 /**************************************************************/
15
16 /* Dudley: ElementFile: this will redistribute the Elements including overlap by */
17
18 /**************************************************************/
19
20 #include "ElementFile.h"
21 #ifdef _OPENMP
22 #include <omp.h>
23 #endif
24
25 /**************************************************************/
26
27 void Dudley_ElementFile_distributeByRankOfDOF(Dudley_ElementFile * self, Esys_MPI_rank * mpiRankOfDOF, index_t * Id)
28 {
29 size_t size_size;
30 Esys_MPI_rank myRank, p, *Owner_buffer = NULL, loc_proc_mask_max;
31 dim_t e, j, i, size, *send_count = NULL, *recv_count = NULL, *newOwner = NULL, *loc_proc_mask =
32 NULL, *loc_send_count = NULL, newNumElements, numElementsInBuffer, numNodes, numRequests, NN;
33 index_t *send_offset = NULL, *recv_offset = NULL, *Id_buffer = NULL, *Tag_buffer = NULL, *Nodes_buffer = NULL, k;
34 bool_t *proc_mask = NULL;
35 #ifdef ESYS_MPI
36 MPI_Request *mpi_requests = NULL;
37 MPI_Status *mpi_stati = NULL;
38 #endif
39 if (self == NULL)
40 return;
41 myRank = self->MPIInfo->rank;
42 size = self->MPIInfo->size;
43 size_size = size * sizeof(dim_t);
44 numNodes = self->numNodes;
45 NN = self->numNodes;
46 if (size > 1)
47 {
48 #ifdef ESYS_MPI
49 mpi_requests = TMPMEMALLOC(8 * size, MPI_Request);
50 mpi_stati = TMPMEMALLOC(8 * size, MPI_Status);
51 Dudley_checkPtr(mpi_requests);
52 Dudley_checkPtr(mpi_stati);
53 #endif
54
55 /* count the number elements that have to be send to each processor (send_count)
56 and define a new element owner as the processor with the largest number of DOFs and the smallest id */
57 send_count = TMPMEMALLOC(size, dim_t);
58 recv_count = TMPMEMALLOC(size, dim_t);
59 newOwner = TMPMEMALLOC(self->numElements, Esys_MPI_rank);
60 if (!(Dudley_checkPtr(send_count) || Dudley_checkPtr(recv_count) || Dudley_checkPtr(newOwner)))
61 {
62 memset(send_count, 0, size_size);
63 #pragma omp parallel private(p,loc_proc_mask,loc_send_count)
64 {
65 loc_proc_mask = THREAD_MEMALLOC(size, dim_t);
66 loc_send_count = THREAD_MEMALLOC(size, dim_t);
67 memset(loc_send_count, 0, size_size);
68 #pragma omp for private(e,j,loc_proc_mask_max) schedule(static)
69 for (e = 0; e < self->numElements; e++)
70 {
71 if (self->Owner[e] == myRank)
72 {
73 newOwner[e] = myRank;
74 memset(loc_proc_mask, 0, size_size);
75 for (j = 0; j < numNodes; j++)
76 {
77 p = mpiRankOfDOF[self->Nodes[INDEX2(j, e, NN)]];
78 loc_proc_mask[p]++;
79 }
80 loc_proc_mask_max = 0;
81 for (p = 0; p < size; ++p)
82 {
83 if (loc_proc_mask[p] > 0)
84 loc_send_count[p]++;
85 if (loc_proc_mask[p] > loc_proc_mask_max)
86 {
87 newOwner[e] = p;
88 loc_proc_mask_max = loc_proc_mask[p];
89 }
90 }
91 }
92 else
93 {
94 newOwner[e] = -1;
95 }
96 }
97 #pragma omp critical
98 {
99 for (p = 0; p < size; ++p)
100 send_count[p] += loc_send_count[p];
101 }
102 THREAD_MEMFREE(loc_proc_mask);
103 THREAD_MEMFREE(loc_send_count);
104 }
105 #ifdef ESYS_MPI
106 MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, self->MPIInfo->comm);
107 #else
108 for (p = 0; p < size; ++p)
109 recv_count[p] = send_count[p];
110 #endif
111 /* get the new number of elements for this processor */
112 newNumElements = 0;
113 for (p = 0; p < size; ++p)
114 newNumElements += recv_count[p];
115
116 /* get the new number of elements for this processor */
117 numElementsInBuffer = 0;
118 for (p = 0; p < size; ++p)
119 numElementsInBuffer += send_count[p];
120 /* allocate buffers */
121 Id_buffer = TMPMEMALLOC(numElementsInBuffer, index_t);
122 Tag_buffer = TMPMEMALLOC(numElementsInBuffer, index_t);
123 Owner_buffer = TMPMEMALLOC(numElementsInBuffer, Esys_MPI_rank);
124 Nodes_buffer = TMPMEMALLOC(numElementsInBuffer * NN, index_t);
125 send_offset = TMPMEMALLOC(size, index_t);
126 recv_offset = TMPMEMALLOC(size, index_t);
127 proc_mask = TMPMEMALLOC(size, bool_t);
128 if (!(Dudley_checkPtr(Id_buffer) || Dudley_checkPtr(Tag_buffer) || Dudley_checkPtr(Owner_buffer) ||
129 Dudley_checkPtr(Nodes_buffer) || Dudley_checkPtr(send_offset) || Dudley_checkPtr(recv_offset) ||
130 Dudley_checkPtr(proc_mask)))
131 {
132
133 /* callculate the offsets for the processor buffers */
134 recv_offset[0] = 0;
135 for (p = 0; p < size - 1; ++p)
136 recv_offset[p + 1] = recv_offset[p] + recv_count[p];
137 send_offset[0] = 0;
138 for (p = 0; p < size - 1; ++p)
139 send_offset[p + 1] = send_offset[p] + send_count[p];
140
141 memset(send_count, 0, size_size);
142 /* copy element into buffers. proc_mask makes sure that an element is copied once only for each processor */
143 for (e = 0; e < self->numElements; e++)
144 {
145 if (self->Owner[e] == myRank)
146 {
147 memset(proc_mask, TRUE, size_size);
148 for (j = 0; j < numNodes; j++)
149 {
150 p = mpiRankOfDOF[self->Nodes[INDEX2(j, e, NN)]];
151 if (proc_mask[p])
152 {
153 k = send_offset[p] + send_count[p];
154 Id_buffer[k] = self->Id[e];
155 Tag_buffer[k] = self->Tag[e];
156 Owner_buffer[k] = newOwner[e];
157 for (i = 0; i < numNodes; i++)
158 Nodes_buffer[INDEX2(i, k, NN)] = Id[self->Nodes[INDEX2(i, e, NN)]];
159 send_count[p]++;
160 proc_mask[p] = FALSE;
161 }
162 }
163 }
164 }
165 /* allocate new tables */
166 Dudley_ElementFile_allocTable(self, newNumElements);
167
168 /* start to receive new elements */
169 numRequests = 0;
170 for (p = 0; p < size; ++p)
171 {
172 if (recv_count[p] > 0)
173 {
174 #ifdef ESYS_MPI
175 MPI_Irecv(&(self->Id[recv_offset[p]]), recv_count[p],
176 MPI_INT, p, self->MPIInfo->msg_tag_counter + myRank,
177 self->MPIInfo->comm, &mpi_requests[numRequests]);
178 numRequests++;
179 MPI_Irecv(&(self->Tag[recv_offset[p]]), recv_count[p],
180 MPI_INT, p, self->MPIInfo->msg_tag_counter + size + myRank,
181 self->MPIInfo->comm, &mpi_requests[numRequests]);
182 numRequests++;
183 MPI_Irecv(&(self->Owner[recv_offset[p]]), recv_count[p],
184 MPI_INT, p, self->MPIInfo->msg_tag_counter + 2 * size + myRank,
185 self->MPIInfo->comm, &mpi_requests[numRequests]);
186 numRequests++;
187 MPI_Irecv(&(self->Nodes[recv_offset[p] * NN]), recv_count[p] * NN,
188 MPI_INT, p, self->MPIInfo->msg_tag_counter + 3 * size + myRank,
189 self->MPIInfo->comm, &mpi_requests[numRequests]);
190 numRequests++;
191 #endif
192 }
193 }
194 /* now the buffers can be send away */
195 for (p = 0; p < size; ++p)
196 {
197 if (send_count[p] > 0)
198 {
199 #ifdef ESYS_MPI
200 MPI_Issend(&(Id_buffer[send_offset[p]]), send_count[p],
201 MPI_INT, p, self->MPIInfo->msg_tag_counter + p,
202 self->MPIInfo->comm, &mpi_requests[numRequests]);
203 numRequests++;
204 MPI_Issend(&(Tag_buffer[send_offset[p]]), send_count[p],
205 MPI_INT, p, self->MPIInfo->msg_tag_counter + size + p,
206 self->MPIInfo->comm, &mpi_requests[numRequests]);
207 numRequests++;
208 MPI_Issend(&(Owner_buffer[send_offset[p]]), send_count[p],
209 MPI_INT, p, self->MPIInfo->msg_tag_counter + 2 * size + p,
210 self->MPIInfo->comm, &mpi_requests[numRequests]);
211 numRequests++;
212 MPI_Issend(&(Nodes_buffer[send_offset[p] * NN]), send_count[p] * NN,
213 MPI_INT, p, self->MPIInfo->msg_tag_counter + 3 * size + p,
214 self->MPIInfo->comm, &mpi_requests[numRequests]);
215 numRequests++;
216 #endif
217
218 }
219 }
220 self->MPIInfo->msg_tag_counter += 4 * size;
221 /* wait for the requests to be finalized */
222 #ifdef ESYS_MPI
223 MPI_Waitall(numRequests, mpi_requests, mpi_stati);
224 #endif
225 }
226 /* clear buffer */
227 TMPMEMFREE(Id_buffer);
228 TMPMEMFREE(Tag_buffer);
229 TMPMEMFREE(Owner_buffer);
230 TMPMEMFREE(Nodes_buffer);
231 TMPMEMFREE(send_offset);
232 TMPMEMFREE(recv_offset);
233 TMPMEMFREE(proc_mask);
234 }
235 #ifdef ESYS_MPI
236 TMPMEMFREE(mpi_requests);
237 TMPMEMFREE(mpi_stati);
238 #endif
239 TMPMEMFREE(send_count);
240 TMPMEMFREE(recv_count);
241 TMPMEMFREE(newOwner);
242 }
243 else
244 {
245 #pragma omp for private(e,i) schedule(static)
246 for (e = 0; e < self->numElements; e++)
247 {
248 self->Owner[e] = myRank;
249 for (i = 0; i < numNodes; i++)
250 self->Nodes[INDEX2(i, e, NN)] = Id[self->Nodes[INDEX2(i, e, NN)]];
251 }
252 }
253 return;
254 }

  ViewVC Help
Powered by ViewVC 1.1.26