1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Finley: Mesh: NodeFile */ |
18 |
|
19 |
/* creates a dense labeling of the global degrees of freedom */ |
20 |
/* and returns the new number of global degrees of freedom */ |
21 |
|
22 |
/**************************************************************/ |
23 |
|
24 |
#include "NodeFile.h" |
25 |
|
26 |
/**************************************************************/ |
27 |
|
28 |
dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in) |
29 |
{ |
30 |
index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k; |
31 |
Esys_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
32 |
dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs; |
33 |
bool_t *set_new_DOF=NULL; |
34 |
#ifdef ESYS_MPI |
35 |
MPI_Status status; |
36 |
#endif |
37 |
|
38 |
/* get the global range of node ids */ |
39 |
Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in); |
40 |
|
41 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
42 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
43 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
44 |
set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t); |
45 |
|
46 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) { |
47 |
/* distribute the range of node ids */ |
48 |
buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution); |
49 |
myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
50 |
/* allocate buffers */ |
51 |
DOF_buffer=TMPMEMALLOC(buffer_len,index_t); |
52 |
if (! Finley_checkPtr(DOF_buffer)) { |
53 |
/* fill DOF_buffer by the unset_dof marker to check if nodes are defined */ |
54 |
#pragma omp parallel for private(n) schedule(static) |
55 |
for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof; |
56 |
|
57 |
/* fill the buffer by sending portions around in a circle */ |
58 |
dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
59 |
source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
60 |
buffer_rank=in->MPIInfo->rank; |
61 |
for (p=0; p< in->MPIInfo->size; ++p) { |
62 |
if (p>0) { /* the initial send can be skipped */ |
63 |
#ifdef ESYS_MPI |
64 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
65 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
66 |
in->MPIInfo->comm,&status); |
67 |
#endif |
68 |
in->MPIInfo->msg_tag_counter++; |
69 |
} |
70 |
buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
71 |
dof_0=distribution[buffer_rank]; |
72 |
dof_1=distribution[buffer_rank+1]; |
73 |
#pragma omp parallel for private(n,k) schedule(static) |
74 |
for (n=0;n<in->numNodes;n++) { |
75 |
k=in->globalDegreesOfFreedom[n]; |
76 |
if ((dof_0<=k) && (k<dof_1)) { |
77 |
DOF_buffer[k-dof_0] = set_dof; |
78 |
} |
79 |
} |
80 |
} |
81 |
/* count the entries in the DOF_buffer */ |
82 |
/* TODO: OMP parallel */ |
83 |
myNewDOFs=0; |
84 |
for (n=0; n<myDOFs; ++n) { |
85 |
if ( DOF_buffer[n] == set_dof) { |
86 |
DOF_buffer[n]=myNewDOFs; |
87 |
myNewDOFs++; |
88 |
} |
89 |
} |
90 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
91 |
loc_offsets[in->MPIInfo->rank]=myNewDOFs; |
92 |
#ifdef ESYS_MPI |
93 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
94 |
new_numGlobalDOFs=0; |
95 |
for (n=0; n< in->MPIInfo->size; ++n) { |
96 |
loc_offsets[n]=new_numGlobalDOFs; |
97 |
new_numGlobalDOFs+=offsets[n]; |
98 |
} |
99 |
#else |
100 |
new_numGlobalDOFs=loc_offsets[0]; |
101 |
loc_offsets[0]=0; |
102 |
#endif |
103 |
#pragma omp parallel |
104 |
{ |
105 |
#pragma omp for private(n) schedule(static) |
106 |
for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
107 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
108 |
#pragma omp for private(n) schedule(static) |
109 |
for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE; |
110 |
} |
111 |
dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
112 |
source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
113 |
buffer_rank=in->MPIInfo->rank; |
114 |
for (p=0; p< in->MPIInfo->size; ++p) { |
115 |
dof_0=distribution[buffer_rank]; |
116 |
dof_1=distribution[buffer_rank+1]; |
117 |
#pragma omp parallel for private(n,k) schedule(static) |
118 |
for (n=0;n<in->numNodes;n++) { |
119 |
k=in->globalDegreesOfFreedom[n]; |
120 |
if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) { |
121 |
in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0]; |
122 |
set_new_DOF[n]=FALSE; |
123 |
} |
124 |
} |
125 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
126 |
#ifdef ESYS_MPI |
127 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
128 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
129 |
in->MPIInfo->comm,&status); |
130 |
#endif |
131 |
in->MPIInfo->msg_tag_counter+=1; |
132 |
} |
133 |
buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
134 |
} |
135 |
} |
136 |
TMPMEMFREE(DOF_buffer); |
137 |
} |
138 |
TMPMEMFREE(distribution); |
139 |
TMPMEMFREE(loc_offsets); |
140 |
TMPMEMFREE(offsets); |
141 |
TMPMEMFREE(set_new_DOF); |
142 |
return new_numGlobalDOFs; |
143 |
} |
144 |
|
145 |
void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Esys_MPI_rank* mpiRankOfDOF, index_t *distribution){ |
146 |
index_t min_DOF,max_DOF, k; |
147 |
dim_t n; |
148 |
Esys_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1; |
149 |
/* first we calculate the min and max dof on this processor to reduce costs for seraching */ |
150 |
Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in); |
151 |
|
152 |
for (p=0; p<in->MPIInfo->size; ++p) { |
153 |
if (distribution[p]<=min_DOF) p_min=p; |
154 |
if (distribution[p]<=max_DOF) p_max=p; |
155 |
} |
156 |
#pragma omp parallel for private(n,k,p) schedule(static) |
157 |
for (n=0; n<in->numNodes; ++n) { |
158 |
k=in->globalDegreesOfFreedom[n]; |
159 |
for (p=p_min; p<=p_max; ++p) { |
160 |
if (k<distribution[p+1]) { |
161 |
mpiRankOfDOF[n]=p; |
162 |
break; |
163 |
} |
164 |
} |
165 |
} |
166 |
} |
167 |
dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask) |
168 |
{ |
169 |
index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k; |
170 |
Esys_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
171 |
dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs; |
172 |
#ifdef ESYS_MPI |
173 |
MPI_Status status; |
174 |
#endif |
175 |
|
176 |
/* get the global range of node ids */ |
177 |
Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in); |
178 |
|
179 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
180 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
181 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
182 |
|
183 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) { |
184 |
/* distribute the range of node ids */ |
185 |
buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution); |
186 |
myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
187 |
/* allocate buffers */ |
188 |
DOF_buffer=TMPMEMALLOC(buffer_len,index_t); |
189 |
if (! Finley_checkPtr(DOF_buffer)) { |
190 |
/* fill DOF_buffer by the unset_dof marker to check if nodes are defined */ |
191 |
#pragma omp parallel for private(n) schedule(static) |
192 |
for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof; |
193 |
|
194 |
/* fill the buffer by sending portions around in a circle */ |
195 |
dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
196 |
source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
197 |
buffer_rank=in->MPIInfo->rank; |
198 |
for (p=0; p< in->MPIInfo->size; ++p) { |
199 |
if (p>0) { /* the initial send can be skipped */ |
200 |
#ifdef ESYS_MPI |
201 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
202 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
203 |
in->MPIInfo->comm,&status); |
204 |
#endif |
205 |
in->MPIInfo->msg_tag_counter++; |
206 |
} |
207 |
buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
208 |
dof_0=distribution[buffer_rank]; |
209 |
dof_1=distribution[buffer_rank+1]; |
210 |
#pragma omp parallel for private(n,k) schedule(static) |
211 |
for (n=0;n<in->numNodes;n++) { |
212 |
if (reducedNodeMask[n] >-1) { |
213 |
k=in->globalDegreesOfFreedom[n]; |
214 |
if ((dof_0<=k) && (k<dof_1)) { |
215 |
DOF_buffer[k-dof_0] = set_dof; |
216 |
} |
217 |
} |
218 |
} |
219 |
} |
220 |
/* count the entries in the DOF_buffer */ |
221 |
/* TODO: OMP parallel */ |
222 |
myNewDOFs=0; |
223 |
for (n=0; n<myDOFs; ++n) { |
224 |
if ( DOF_buffer[n] == set_dof) { |
225 |
DOF_buffer[n]=myNewDOFs; |
226 |
myNewDOFs++; |
227 |
} |
228 |
} |
229 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
230 |
loc_offsets[in->MPIInfo->rank]=myNewDOFs; |
231 |
#ifdef ESYS_MPI |
232 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
233 |
globalNumReducedDOFs=0; |
234 |
for (n=0; n< in->MPIInfo->size; ++n) { |
235 |
loc_offsets[n]=globalNumReducedDOFs; |
236 |
globalNumReducedDOFs+=offsets[n]; |
237 |
} |
238 |
#else |
239 |
globalNumReducedDOFs=loc_offsets[0]; |
240 |
loc_offsets[0]=0; |
241 |
#endif |
242 |
#pragma omp parallel for private(n) schedule(static) |
243 |
for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
244 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
245 |
#pragma omp parallel for private(n) schedule(static) |
246 |
for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1; |
247 |
dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
248 |
source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
249 |
buffer_rank=in->MPIInfo->rank; |
250 |
for (p=0; p< in->MPIInfo->size; ++p) { |
251 |
dof_0=distribution[buffer_rank]; |
252 |
dof_1=distribution[buffer_rank+1]; |
253 |
#pragma omp parallel for private(n,k) schedule(static) |
254 |
for (n=0;n<in->numNodes;n++) { |
255 |
if (reducedNodeMask[n] >-1) { |
256 |
k=in->globalDegreesOfFreedom[n]; |
257 |
if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0]; |
258 |
} |
259 |
} |
260 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
261 |
#ifdef ESYS_MPI |
262 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
263 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
264 |
in->MPIInfo->comm,&status); |
265 |
#endif |
266 |
in->MPIInfo->msg_tag_counter+=1; |
267 |
} |
268 |
buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
269 |
} |
270 |
} |
271 |
TMPMEMFREE(DOF_buffer); |
272 |
} |
273 |
TMPMEMFREE(distribution); |
274 |
TMPMEMFREE(loc_offsets); |
275 |
TMPMEMFREE(offsets); |
276 |
return globalNumReducedDOFs; |
277 |
} |
278 |
dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution) |
279 |
{ |
280 |
index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL; |
281 |
dim_t n, my_buffer_len, buffer_len, globalNumNodes=0, myNewNumNodes; |
282 |
Esys_MPI_rank p, dest, source, buffer_rank; |
283 |
const index_t unset_nodeID=-1, set_nodeID=1; |
284 |
const dim_t header_len=2; |
285 |
#ifdef ESYS_MPI |
286 |
MPI_Status status; |
287 |
#endif |
288 |
Esys_MPI_rank myRank=in->MPIInfo->rank; |
289 |
|
290 |
/* find the range of node ids controled by me */ |
291 |
|
292 |
myFirstDOF=dof_distribution[myRank]; |
293 |
myLastDOF=dof_distribution[myRank+1]; |
294 |
max_id=-INDEX_T_MAX; |
295 |
min_id=INDEX_T_MAX; |
296 |
#pragma omp parallel private(loc_max_id,loc_min_id) |
297 |
{ |
298 |
loc_max_id=max_id; |
299 |
loc_min_id=min_id; |
300 |
#pragma omp for private(n,dof,id) schedule(static) |
301 |
for (n=0;n<in->numNodes;n++) { |
302 |
dof=in->globalDegreesOfFreedom[n]; |
303 |
id=in->Id[n]; |
304 |
if ((myFirstDOF<= dof) && (dof< myLastDOF)) { |
305 |
loc_max_id=MAX(loc_max_id,id); |
306 |
loc_min_id=MIN(loc_min_id,id); |
307 |
} |
308 |
} |
309 |
#pragma omp critical |
310 |
{ |
311 |
max_id=MAX(loc_max_id,max_id); |
312 |
min_id=MIN(loc_min_id,min_id); |
313 |
} |
314 |
} |
315 |
/* allocate a buffer */ |
316 |
my_buffer_len=max_id>=min_id ? max_id-min_id+1 :0; |
317 |
|
318 |
#ifdef ESYS_MPI |
319 |
MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm ); |
320 |
#else |
321 |
buffer_len=my_buffer_len; |
322 |
#endif |
323 |
|
324 |
Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t); |
325 |
if (! Finley_checkPtr(Node_buffer)) { |
326 |
/* mark and count the nodes in use */ |
327 |
#pragma omp parallel |
328 |
{ |
329 |
#pragma omp for private(n) schedule(static) |
330 |
for (n=0;n<buffer_len+header_len;n++) Node_buffer[n]=unset_nodeID; |
331 |
#pragma omp for private(n) schedule(static) |
332 |
for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1; |
333 |
#pragma omp for private(n,dof,id) schedule(static) |
334 |
for (n=0;n<in->numNodes;n++) { |
335 |
dof=in->globalDegreesOfFreedom[n]; |
336 |
id=in->Id[n]; |
337 |
if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID; |
338 |
} |
339 |
} |
340 |
myNewNumNodes=0; |
341 |
for (n=0;n<my_buffer_len;n++) { |
342 |
if (Node_buffer[header_len+n]==set_nodeID) { |
343 |
Node_buffer[header_len+n]=myNewNumNodes; |
344 |
myNewNumNodes++; |
345 |
} |
346 |
} |
347 |
/* make the local number of nodes globally available */ |
348 |
#ifdef ESYS_MPI |
349 |
MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm); |
350 |
#else |
351 |
node_distribution[0]=myNewNumNodes; |
352 |
#endif |
353 |
|
354 |
|
355 |
globalNumNodes=0; |
356 |
for (p=0; p< in->MPIInfo->size; ++p) { |
357 |
itmp=node_distribution[p]; |
358 |
node_distribution[p]=globalNumNodes; |
359 |
globalNumNodes+=itmp; |
360 |
} |
361 |
node_distribution[in->MPIInfo->size]=globalNumNodes; |
362 |
|
363 |
/* offset nodebuffer */ |
364 |
itmp=node_distribution[in->MPIInfo->rank]; |
365 |
#pragma omp for private(n) schedule(static) |
366 |
for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp; |
367 |
|
368 |
|
369 |
/* now we send this buffer around to assign global node index: */ |
370 |
dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
371 |
source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
372 |
Node_buffer[0]=min_id; |
373 |
Node_buffer[1]=max_id; |
374 |
buffer_rank=in->MPIInfo->rank; |
375 |
for (p=0; p< in->MPIInfo->size; ++p) { |
376 |
nodeID_0=Node_buffer[0]; |
377 |
nodeID_1=Node_buffer[1]; |
378 |
dof_0=dof_distribution[buffer_rank]; |
379 |
dof_1=dof_distribution[buffer_rank+1]; |
380 |
if (nodeID_0<=nodeID_1) { |
381 |
#pragma omp for private(n,dof,id) schedule(static) |
382 |
for (n=0;n<in->numNodes;n++) { |
383 |
dof=in->globalDegreesOfFreedom[n]; |
384 |
id=in->Id[n]-nodeID_0; |
385 |
if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len]; |
386 |
} |
387 |
} |
388 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
389 |
#ifdef ESYS_MPI |
390 |
MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT, |
391 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
392 |
in->MPIInfo->comm,&status); |
393 |
#endif |
394 |
in->MPIInfo->msg_tag_counter+=1; |
395 |
} |
396 |
buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
397 |
} |
398 |
} |
399 |
TMPMEMFREE(Node_buffer); |
400 |
return globalNumNodes; |
401 |
} |
402 |
|
403 |
dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask) |
404 |
{ |
405 |
index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k; |
406 |
Esys_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
407 |
dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes; |
408 |
#ifdef ESYS_MPI |
409 |
MPI_Status status; |
410 |
#endif |
411 |
|
412 |
/* get the global range of node ids */ |
413 |
Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in); |
414 |
|
415 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
416 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
417 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
418 |
|
419 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) { |
420 |
/* distribute the range of node ids */ |
421 |
buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution); |
422 |
myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
423 |
/* allocate buffers */ |
424 |
Nodes_buffer=TMPMEMALLOC(buffer_len,index_t); |
425 |
if (! Finley_checkPtr(Nodes_buffer)) { |
426 |
/* fill Nodes_buffer by the unset_node marker to check if nodes are defined */ |
427 |
#pragma omp parallel for private(n) schedule(static) |
428 |
for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node; |
429 |
|
430 |
/* fill the buffer by sending portions around in a circle */ |
431 |
dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
432 |
source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
433 |
buffer_rank=in->MPIInfo->rank; |
434 |
for (p=0; p< in->MPIInfo->size; ++p) { |
435 |
if (p>0) { /* the initial send can be skipped */ |
436 |
#ifdef ESYS_MPI |
437 |
MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT, |
438 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
439 |
in->MPIInfo->comm,&status); |
440 |
#endif |
441 |
in->MPIInfo->msg_tag_counter++; |
442 |
} |
443 |
buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
444 |
node_0=distribution[buffer_rank]; |
445 |
node_1=distribution[buffer_rank+1]; |
446 |
#pragma omp parallel for private(n,k) schedule(static) |
447 |
for (n=0;n<in->numNodes;n++) { |
448 |
if (reducedNodeMask[n] >-1) { |
449 |
k=in->globalNodesIndex[n]; |
450 |
if ((node_0<=k) && (k<node_1)) { |
451 |
Nodes_buffer[k-node_0] = set_node; |
452 |
} |
453 |
} |
454 |
} |
455 |
} |
456 |
/* count the entries in the Nodes_buffer */ |
457 |
/* TODO: OMP parallel */ |
458 |
myNewNodes=0; |
459 |
for (n=0; n<myNodes; ++n) { |
460 |
if ( Nodes_buffer[n] == set_node) { |
461 |
Nodes_buffer[n]=myNewNodes; |
462 |
myNewNodes++; |
463 |
} |
464 |
} |
465 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
466 |
loc_offsets[in->MPIInfo->rank]=myNewNodes; |
467 |
#ifdef ESYS_MPI |
468 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
469 |
globalNumReducedNodes=0; |
470 |
for (n=0; n< in->MPIInfo->size; ++n) { |
471 |
loc_offsets[n]=globalNumReducedNodes; |
472 |
globalNumReducedNodes+=offsets[n]; |
473 |
} |
474 |
#else |
475 |
globalNumReducedNodes=loc_offsets[0]; |
476 |
loc_offsets[0]=0; |
477 |
#endif |
478 |
#pragma omp parallel for private(n) schedule(static) |
479 |
for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
480 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
481 |
#pragma omp parallel for private(n) schedule(static) |
482 |
for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1; |
483 |
dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
484 |
source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
485 |
buffer_rank=in->MPIInfo->rank; |
486 |
for (p=0; p< in->MPIInfo->size; ++p) { |
487 |
node_0=distribution[buffer_rank]; |
488 |
node_1=distribution[buffer_rank+1]; |
489 |
#pragma omp parallel for private(n,k) schedule(static) |
490 |
for (n=0;n<in->numNodes;n++) { |
491 |
if (reducedNodeMask[n] >-1) { |
492 |
k=in->globalNodesIndex[n]; |
493 |
if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0]; |
494 |
} |
495 |
} |
496 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
497 |
#ifdef ESYS_MPI |
498 |
MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT, |
499 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
500 |
in->MPIInfo->comm,&status); |
501 |
#endif |
502 |
in->MPIInfo->msg_tag_counter+=1; |
503 |
} |
504 |
buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
505 |
} |
506 |
} |
507 |
TMPMEMFREE(Nodes_buffer); |
508 |
} |
509 |
TMPMEMFREE(distribution); |
510 |
TMPMEMFREE(loc_offsets); |
511 |
TMPMEMFREE(offsets); |
512 |
return globalNumReducedNodes; |
513 |
} |