1 |
|
2 |
/* $Id$ */ |
3 |
|
4 |
/******************************************************* |
5 |
* |
6 |
* Copyright 2003-2007 by ACceSS MNRF |
7 |
* Copyright 2007 by University of Queensland |
8 |
* |
9 |
* http://esscc.uq.edu.au |
10 |
* Primary Business: Queensland, Australia |
11 |
* Licensed under the Open Software License version 3.0 |
12 |
* http://www.opensource.org/licenses/osl-3.0.php |
13 |
* |
14 |
*******************************************************/ |
15 |
|
16 |
/**************************************************************/ |
17 |
|
18 |
/* Finley: Mesh: NodeFile */ |
19 |
|
20 |
/* creates a dense labeling of the global degrees of freedom */ |
21 |
/* and returns the new number of global degrees of freedom */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "NodeFile.h" |
26 |
|
27 |
/**************************************************************/ |
28 |
|
29 |
dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in) |
30 |
{ |
31 |
index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k; |
32 |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
33 |
dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs; |
34 |
bool_t *set_new_DOF=NULL; |
35 |
#ifdef PASO_MPI |
36 |
MPI_Status status; |
37 |
#endif |
38 |
|
39 |
/* get the global range of node ids */ |
40 |
Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in); |
41 |
|
42 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
43 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
44 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
45 |
set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t); |
46 |
|
47 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) { |
48 |
/* distribute the range of node ids */ |
49 |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution); |
50 |
myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
51 |
/* allocate buffers */ |
52 |
DOF_buffer=TMPMEMALLOC(buffer_len,index_t); |
53 |
if (! Finley_checkPtr(DOF_buffer)) { |
54 |
/* fill DOF_buffer by the unset_dof marker to check if nodes are defined */ |
55 |
#pragma omp parallel for private(n) schedule(static) |
56 |
for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof; |
57 |
|
58 |
/* fill the buffer by sending portions around in a circle */ |
59 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
60 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
61 |
buffer_rank=in->MPIInfo->rank; |
62 |
for (p=0; p< in->MPIInfo->size; ++p) { |
63 |
if (p>0) { /* the initial send can be skipped */ |
64 |
#ifdef PASO_MPI |
65 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
66 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
67 |
in->MPIInfo->comm,&status); |
68 |
#endif |
69 |
in->MPIInfo->msg_tag_counter++; |
70 |
} |
71 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
72 |
dof_0=distribution[buffer_rank]; |
73 |
dof_1=distribution[buffer_rank+1]; |
74 |
#pragma omp parallel for private(n,k) schedule(static) |
75 |
for (n=0;n<in->numNodes;n++) { |
76 |
k=in->globalDegreesOfFreedom[n]; |
77 |
if ((dof_0<=k) && (k<dof_1)) { |
78 |
DOF_buffer[k-dof_0] = set_dof; |
79 |
} |
80 |
} |
81 |
} |
82 |
/* count the entries in the DOF_buffer */ |
83 |
/* TODO: OMP parallel */ |
84 |
myNewDOFs=0; |
85 |
for (n=0; n<myDOFs; ++n) { |
86 |
if ( DOF_buffer[n] == set_dof) { |
87 |
DOF_buffer[n]=myNewDOFs; |
88 |
myNewDOFs++; |
89 |
} |
90 |
} |
91 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
92 |
loc_offsets[in->MPIInfo->rank]=myNewDOFs; |
93 |
#ifdef PASO_MPI |
94 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
95 |
new_numGlobalDOFs=0; |
96 |
for (n=0; n< in->MPIInfo->size; ++n) { |
97 |
loc_offsets[n]=new_numGlobalDOFs; |
98 |
new_numGlobalDOFs+=offsets[n]; |
99 |
} |
100 |
#else |
101 |
new_numGlobalDOFs=loc_offsets[0]; |
102 |
loc_offsets[0]=0; |
103 |
#endif |
104 |
#pragma omp parallel |
105 |
{ |
106 |
#pragma omp for private(n) schedule(static) |
107 |
for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
108 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
109 |
#pragma omp for private(n) schedule(static) |
110 |
for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE; |
111 |
} |
112 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
113 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
114 |
buffer_rank=in->MPIInfo->rank; |
115 |
for (p=0; p< in->MPIInfo->size; ++p) { |
116 |
dof_0=distribution[buffer_rank]; |
117 |
dof_1=distribution[buffer_rank+1]; |
118 |
#pragma omp parallel for private(n,k) schedule(static) |
119 |
for (n=0;n<in->numNodes;n++) { |
120 |
k=in->globalDegreesOfFreedom[n]; |
121 |
if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) { |
122 |
in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0]; |
123 |
set_new_DOF[n]=FALSE; |
124 |
} |
125 |
} |
126 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
127 |
#ifdef PASO_MPI |
128 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
129 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
130 |
in->MPIInfo->comm,&status); |
131 |
#endif |
132 |
in->MPIInfo->msg_tag_counter+=1; |
133 |
} |
134 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
135 |
} |
136 |
} |
137 |
TMPMEMFREE(DOF_buffer); |
138 |
} |
139 |
TMPMEMFREE(distribution); |
140 |
TMPMEMFREE(loc_offsets); |
141 |
TMPMEMFREE(offsets); |
142 |
TMPMEMFREE(set_new_DOF); |
143 |
return new_numGlobalDOFs; |
144 |
} |
145 |
|
146 |
void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){ |
147 |
index_t min_DOF,max_DOF, k; |
148 |
dim_t n; |
149 |
Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1; |
150 |
/* first we calculate the min and max dof on this processor to reduce costs for seraching */ |
151 |
Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in); |
152 |
|
153 |
for (p=0; p<in->MPIInfo->size; ++p) { |
154 |
if (distribution[p]<=min_DOF) p_min=p; |
155 |
if (distribution[p]<=max_DOF) p_max=p; |
156 |
} |
157 |
#pragma omp parallel for private(n,k,p) schedule(static) |
158 |
for (n=0; n<in->numNodes; ++n) { |
159 |
k=in->globalDegreesOfFreedom[n]; |
160 |
for (p=p_min; p<=p_max; ++p) { |
161 |
if (k<distribution[p+1]) { |
162 |
mpiRankOfDOF[n]=p; |
163 |
break; |
164 |
} |
165 |
} |
166 |
} |
167 |
} |
168 |
dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask) |
169 |
{ |
170 |
index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k; |
171 |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
172 |
dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs; |
173 |
#ifdef PASO_MPI |
174 |
MPI_Status status; |
175 |
#endif |
176 |
|
177 |
/* get the global range of node ids */ |
178 |
Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in); |
179 |
|
180 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
181 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
182 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
183 |
|
184 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) { |
185 |
/* distribute the range of node ids */ |
186 |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution); |
187 |
myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
188 |
/* allocate buffers */ |
189 |
DOF_buffer=TMPMEMALLOC(buffer_len,index_t); |
190 |
if (! Finley_checkPtr(DOF_buffer)) { |
191 |
/* fill DOF_buffer by the unset_dof marker to check if nodes are defined */ |
192 |
#pragma omp parallel for private(n) schedule(static) |
193 |
for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof; |
194 |
|
195 |
/* fill the buffer by sending portions around in a circle */ |
196 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
197 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
198 |
buffer_rank=in->MPIInfo->rank; |
199 |
for (p=0; p< in->MPIInfo->size; ++p) { |
200 |
if (p>0) { /* the initial send can be skipped */ |
201 |
#ifdef PASO_MPI |
202 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
203 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
204 |
in->MPIInfo->comm,&status); |
205 |
#endif |
206 |
in->MPIInfo->msg_tag_counter++; |
207 |
} |
208 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
209 |
dof_0=distribution[buffer_rank]; |
210 |
dof_1=distribution[buffer_rank+1]; |
211 |
#pragma omp parallel for private(n,k) schedule(static) |
212 |
for (n=0;n<in->numNodes;n++) { |
213 |
if (reducedNodeMask[n] >-1) { |
214 |
k=in->globalDegreesOfFreedom[n]; |
215 |
if ((dof_0<=k) && (k<dof_1)) { |
216 |
DOF_buffer[k-dof_0] = set_dof; |
217 |
} |
218 |
} |
219 |
} |
220 |
} |
221 |
/* count the entries in the DOF_buffer */ |
222 |
/* TODO: OMP parallel */ |
223 |
myNewDOFs=0; |
224 |
for (n=0; n<myDOFs; ++n) { |
225 |
if ( DOF_buffer[n] == set_dof) { |
226 |
DOF_buffer[n]=myNewDOFs; |
227 |
myNewDOFs++; |
228 |
} |
229 |
} |
230 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
231 |
loc_offsets[in->MPIInfo->rank]=myNewDOFs; |
232 |
#ifdef PASO_MPI |
233 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
234 |
globalNumReducedDOFs=0; |
235 |
for (n=0; n< in->MPIInfo->size; ++n) { |
236 |
loc_offsets[n]=globalNumReducedDOFs; |
237 |
globalNumReducedDOFs+=offsets[n]; |
238 |
} |
239 |
#else |
240 |
globalNumReducedDOFs=loc_offsets[0]; |
241 |
loc_offsets[0]=0; |
242 |
#endif |
243 |
#pragma omp parallel for private(n) schedule(static) |
244 |
for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
245 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
246 |
#pragma omp parallel for private(n) schedule(static) |
247 |
for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1; |
248 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
249 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
250 |
buffer_rank=in->MPIInfo->rank; |
251 |
for (p=0; p< in->MPIInfo->size; ++p) { |
252 |
dof_0=distribution[buffer_rank]; |
253 |
dof_1=distribution[buffer_rank+1]; |
254 |
#pragma omp parallel for private(n,k) schedule(static) |
255 |
for (n=0;n<in->numNodes;n++) { |
256 |
if (reducedNodeMask[n] >-1) { |
257 |
k=in->globalDegreesOfFreedom[n]; |
258 |
if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0]; |
259 |
} |
260 |
} |
261 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
262 |
#ifdef PASO_MPI |
263 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
264 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
265 |
in->MPIInfo->comm,&status); |
266 |
#endif |
267 |
in->MPIInfo->msg_tag_counter+=1; |
268 |
} |
269 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
270 |
} |
271 |
} |
272 |
TMPMEMFREE(DOF_buffer); |
273 |
} |
274 |
TMPMEMFREE(distribution); |
275 |
TMPMEMFREE(loc_offsets); |
276 |
TMPMEMFREE(offsets); |
277 |
return globalNumReducedDOFs; |
278 |
} |
279 |
dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution) |
280 |
{ |
281 |
index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL; |
282 |
dim_t n, my_buffer_len, buffer_len, globalNumNodes, myNewNumNodes; |
283 |
Paso_MPI_rank p, dest, source, buffer_rank; |
284 |
const index_t unset_nodeID=-1, set_nodeID=1; |
285 |
const dim_t header_len=2; |
286 |
#ifdef PASO_MPI |
287 |
MPI_Status status; |
288 |
#endif |
289 |
Paso_MPI_rank myRank=in->MPIInfo->rank; |
290 |
|
291 |
/* find the range of node ids controled by me */ |
292 |
|
293 |
myFirstDOF=dof_distribution[in->MPIInfo->rank]; |
294 |
myLastDOF=dof_distribution[in->MPIInfo->rank+1]; |
295 |
max_id=-INDEX_T_MAX; |
296 |
min_id=INDEX_T_MAX; |
297 |
#pragma omp parallel private(loc_max_id,loc_min_id) |
298 |
{ |
299 |
loc_max_id=max_id; |
300 |
loc_min_id=min_id; |
301 |
#pragma omp for private(n,dof,id) schedule(static) |
302 |
for (n=0;n<in->numNodes;n++) { |
303 |
dof=in->globalDegreesOfFreedom[n]; |
304 |
id=in->Id[n]; |
305 |
if ((myFirstDOF<= dof) && (dof< myLastDOF)) { |
306 |
loc_max_id=MAX(loc_max_id,id); |
307 |
loc_min_id=MIN(loc_min_id,id); |
308 |
} |
309 |
} |
310 |
#pragma omp critical |
311 |
{ |
312 |
max_id=MAX(loc_max_id,max_id); |
313 |
min_id=MIN(loc_min_id,min_id); |
314 |
} |
315 |
} |
316 |
/* allocate a buffer */ |
317 |
my_buffer_len=max_id> min_id ? max_id-min_id+1 :0; |
318 |
|
319 |
#ifdef PASO_MPI |
320 |
MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm ); |
321 |
#else |
322 |
buffer_len=my_buffer_len; |
323 |
#endif |
324 |
|
325 |
Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t); |
326 |
if (! Finley_checkPtr(Node_buffer)) { |
327 |
/* mark and count the nodes in use */ |
328 |
#pragma omp parallel |
329 |
{ |
330 |
#pragma omp for private(n) schedule(static) |
331 |
for (n=0;n<buffer_len+header_len;n++) Node_buffer[n]=unset_nodeID; |
332 |
#pragma omp for private(n) schedule(static) |
333 |
for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1; |
334 |
#pragma omp for private(n,dof,id) schedule(static) |
335 |
for (n=0;n<in->numNodes;n++) { |
336 |
dof=in->globalDegreesOfFreedom[n]; |
337 |
id=in->Id[n]; |
338 |
if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID; |
339 |
} |
340 |
} |
341 |
myNewNumNodes=0; |
342 |
for (n=0;n<my_buffer_len;n++) { |
343 |
if (Node_buffer[header_len+n]==set_nodeID) { |
344 |
Node_buffer[header_len+n]=myNewNumNodes; |
345 |
myNewNumNodes++; |
346 |
} |
347 |
} |
348 |
/* make the local number of nodes globally available */ |
349 |
#ifdef PASO_MPI |
350 |
MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm); |
351 |
#else |
352 |
node_distribution[0]=myNewNumNodes; |
353 |
#endif |
354 |
globalNumNodes=0; |
355 |
for (p=0; p< in->MPIInfo->size; ++p) { |
356 |
itmp=node_distribution[p]; |
357 |
node_distribution[p]=globalNumNodes; |
358 |
globalNumNodes+=itmp; |
359 |
} |
360 |
node_distribution[in->MPIInfo->size]=globalNumNodes; |
361 |
|
362 |
/* offset nodebuffer */ |
363 |
itmp=node_distribution[in->MPIInfo->rank]; |
364 |
#pragma omp for private(n) schedule(static) |
365 |
for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp; |
366 |
|
367 |
/* now we send this buffer around to assign global node index: */ |
368 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
369 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
370 |
Node_buffer[0]=min_id; |
371 |
Node_buffer[1]=max_id; |
372 |
buffer_rank=in->MPIInfo->rank; |
373 |
for (p=0; p< in->MPIInfo->size; ++p) { |
374 |
nodeID_0=Node_buffer[0]; |
375 |
nodeID_1=Node_buffer[1]; |
376 |
dof_0=dof_distribution[buffer_rank]; |
377 |
dof_1=dof_distribution[buffer_rank+1]; |
378 |
if (nodeID_0<=nodeID_1) { |
379 |
#pragma omp for private(n,dof,id) schedule(static) |
380 |
for (n=0;n<in->numNodes;n++) { |
381 |
dof=in->globalDegreesOfFreedom[n]; |
382 |
id=in->Id[n]-nodeID_0; |
383 |
if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len]; |
384 |
} |
385 |
} |
386 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
387 |
#ifdef PASO_MPI |
388 |
MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT, |
389 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
390 |
in->MPIInfo->comm,&status); |
391 |
#endif |
392 |
in->MPIInfo->msg_tag_counter+=1; |
393 |
} |
394 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
395 |
} |
396 |
} |
397 |
TMPMEMFREE(Node_buffer); |
398 |
return globalNumNodes; |
399 |
} |
400 |
|
401 |
dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask) |
402 |
{ |
403 |
index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k; |
404 |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
405 |
dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes; |
406 |
#ifdef PASO_MPI |
407 |
MPI_Status status; |
408 |
#endif |
409 |
|
410 |
/* get the global range of node ids */ |
411 |
Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in); |
412 |
|
413 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
414 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
415 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
416 |
|
417 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) { |
418 |
/* distribute the range of node ids */ |
419 |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution); |
420 |
myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
421 |
/* allocate buffers */ |
422 |
Nodes_buffer=TMPMEMALLOC(buffer_len,index_t); |
423 |
if (! Finley_checkPtr(Nodes_buffer)) { |
424 |
/* fill Nodes_buffer by the unset_node marker to check if nodes are defined */ |
425 |
#pragma omp parallel for private(n) schedule(static) |
426 |
for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node; |
427 |
|
428 |
/* fill the buffer by sending portions around in a circle */ |
429 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
430 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
431 |
buffer_rank=in->MPIInfo->rank; |
432 |
for (p=0; p< in->MPIInfo->size; ++p) { |
433 |
if (p>0) { /* the initial send can be skipped */ |
434 |
#ifdef PASO_MPI |
435 |
MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT, |
436 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
437 |
in->MPIInfo->comm,&status); |
438 |
#endif |
439 |
in->MPIInfo->msg_tag_counter++; |
440 |
} |
441 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
442 |
node_0=distribution[buffer_rank]; |
443 |
node_1=distribution[buffer_rank+1]; |
444 |
#pragma omp parallel for private(n,k) schedule(static) |
445 |
for (n=0;n<in->numNodes;n++) { |
446 |
if (reducedNodeMask[n] >-1) { |
447 |
k=in->globalNodesIndex[n]; |
448 |
if ((node_0<=k) && (k<node_1)) { |
449 |
Nodes_buffer[k-node_0] = set_node; |
450 |
} |
451 |
} |
452 |
} |
453 |
} |
454 |
/* count the entries in the Nodes_buffer */ |
455 |
/* TODO: OMP parallel */ |
456 |
myNewNodes=0; |
457 |
for (n=0; n<myNodes; ++n) { |
458 |
if ( Nodes_buffer[n] == set_node) { |
459 |
Nodes_buffer[n]=myNewNodes; |
460 |
myNewNodes++; |
461 |
} |
462 |
} |
463 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
464 |
loc_offsets[in->MPIInfo->rank]=myNewNodes; |
465 |
#ifdef PASO_MPI |
466 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
467 |
globalNumReducedNodes=0; |
468 |
for (n=0; n< in->MPIInfo->size; ++n) { |
469 |
loc_offsets[n]=globalNumReducedNodes; |
470 |
globalNumReducedNodes+=offsets[n]; |
471 |
} |
472 |
#else |
473 |
globalNumReducedNodes=loc_offsets[0]; |
474 |
loc_offsets[0]=0; |
475 |
#endif |
476 |
#pragma omp parallel for private(n) schedule(static) |
477 |
for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
478 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
479 |
#pragma omp parallel for private(n) schedule(static) |
480 |
for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1; |
481 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
482 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
483 |
buffer_rank=in->MPIInfo->rank; |
484 |
for (p=0; p< in->MPIInfo->size; ++p) { |
485 |
node_0=distribution[buffer_rank]; |
486 |
node_1=distribution[buffer_rank+1]; |
487 |
#pragma omp parallel for private(n,k) schedule(static) |
488 |
for (n=0;n<in->numNodes;n++) { |
489 |
if (reducedNodeMask[n] >-1) { |
490 |
k=in->globalNodesIndex[n]; |
491 |
if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0]; |
492 |
} |
493 |
} |
494 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
495 |
#ifdef PASO_MPI |
496 |
MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT, |
497 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
498 |
in->MPIInfo->comm,&status); |
499 |
#endif |
500 |
in->MPIInfo->msg_tag_counter+=1; |
501 |
} |
502 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
503 |
} |
504 |
} |
505 |
TMPMEMFREE(Nodes_buffer); |
506 |
} |
507 |
TMPMEMFREE(distribution); |
508 |
TMPMEMFREE(loc_offsets); |
509 |
TMPMEMFREE(offsets); |
510 |
return globalNumReducedNodes; |
511 |
} |