1 |
|
2 |
/* $Id$ */ |
3 |
|
4 |
/******************************************************* |
5 |
* |
6 |
* Copyright 2003-2007 by ACceSS MNRF |
7 |
* Copyright 2007 by University of Queensland |
8 |
* |
9 |
* http://esscc.uq.edu.au |
10 |
* Primary Business: Queensland, Australia |
11 |
* Licensed under the Open Software License version 3.0 |
12 |
* http://www.opensource.org/licenses/osl-3.0.php |
13 |
* |
14 |
*******************************************************/ |
15 |
|
16 |
/**************************************************************/ |
17 |
|
18 |
/* Finley: Mesh: NodeFile */ |
19 |
|
20 |
/* creates a dense labeling of the global degrees of freedom */ |
21 |
/* and returns the new number of global degrees of freedom */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "NodeFile.h" |
26 |
|
27 |
/**************************************************************/ |
28 |
|
29 |
dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in) |
30 |
{ |
31 |
index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k; |
32 |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
33 |
dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs; |
34 |
bool_t *set_new_DOF=NULL; |
35 |
#ifdef PASO_MPI |
36 |
MPI_Status status; |
37 |
#endif |
38 |
|
39 |
/* get the global range of node ids */ |
40 |
Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in); |
41 |
|
42 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
43 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
44 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
45 |
set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t); |
46 |
|
47 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) { |
48 |
/* distribute the range of node ids */ |
49 |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution); |
50 |
myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
51 |
/* allocate buffers */ |
52 |
DOF_buffer=TMPMEMALLOC(buffer_len,index_t); |
53 |
if (! Finley_checkPtr(DOF_buffer)) { |
54 |
/* fill DOF_buffer by the unset_dof marker to check if nodes are defined */ |
55 |
#pragma omp parallel for private(n) schedule(static) |
56 |
for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof; |
57 |
|
58 |
/* fill the buffer by sending portions around in a circle */ |
59 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
60 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
61 |
buffer_rank=in->MPIInfo->rank; |
62 |
for (p=0; p< in->MPIInfo->size; ++p) { |
63 |
if (p>0) { /* the initial send can be skipped */ |
64 |
#ifdef PASO_MPI |
65 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
66 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
67 |
in->MPIInfo->comm,&status); |
68 |
#endif |
69 |
in->MPIInfo->msg_tag_counter++; |
70 |
} |
71 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
72 |
dof_0=distribution[buffer_rank]; |
73 |
dof_1=distribution[buffer_rank+1]; |
74 |
#pragma omp parallel for private(n,k) schedule(static) |
75 |
for (n=0;n<in->numNodes;n++) { |
76 |
k=in->globalDegreesOfFreedom[n]; |
77 |
if ((dof_0<=k) && (k<dof_1)) { |
78 |
DOF_buffer[k-dof_0] = set_dof; |
79 |
} |
80 |
} |
81 |
} |
82 |
/* count the entries in the DOF_buffer */ |
83 |
/* TODO: OMP parallel */ |
84 |
myNewDOFs=0; |
85 |
for (n=0; n<myDOFs; ++n) { |
86 |
if ( DOF_buffer[n] == set_dof) { |
87 |
DOF_buffer[n]=myNewDOFs; |
88 |
myNewDOFs++; |
89 |
} |
90 |
} |
91 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
92 |
loc_offsets[in->MPIInfo->rank]=myNewDOFs; |
93 |
#ifdef PASO_MPI |
94 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
95 |
new_numGlobalDOFs=0; |
96 |
for (n=0; n< in->MPIInfo->size; ++n) { |
97 |
loc_offsets[n]=new_numGlobalDOFs; |
98 |
new_numGlobalDOFs+=offsets[n]; |
99 |
} |
100 |
#else |
101 |
new_numGlobalDOFs=loc_offsets[0]; |
102 |
loc_offsets[0]=0; |
103 |
#endif |
104 |
#pragma omp parallel for private(n) schedule(static) |
105 |
for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
106 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
107 |
#pragma omp parallel for private(n) schedule(static) |
108 |
for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE; |
109 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
110 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
111 |
buffer_rank=in->MPIInfo->rank; |
112 |
for (p=0; p< in->MPIInfo->size; ++p) { |
113 |
dof_0=distribution[buffer_rank]; |
114 |
dof_1=distribution[buffer_rank+1]; |
115 |
#pragma omp parallel for private(n,k) schedule(static) |
116 |
for (n=0;n<in->numNodes;n++) { |
117 |
k=in->globalDegreesOfFreedom[n]; |
118 |
if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) { |
119 |
in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0]; |
120 |
set_new_DOF[n]=FALSE; |
121 |
} |
122 |
} |
123 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
124 |
#ifdef PASO_MPI |
125 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
126 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
127 |
in->MPIInfo->comm,&status); |
128 |
#endif |
129 |
in->MPIInfo->msg_tag_counter+=1; |
130 |
} |
131 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
132 |
} |
133 |
} |
134 |
TMPMEMFREE(DOF_buffer); |
135 |
} |
136 |
TMPMEMFREE(distribution); |
137 |
TMPMEMFREE(loc_offsets); |
138 |
TMPMEMFREE(offsets); |
139 |
TMPMEMFREE(set_new_DOF); |
140 |
return new_numGlobalDOFs; |
141 |
} |
142 |
|
143 |
void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){ |
144 |
index_t min_DOF,max_DOF, k; |
145 |
dim_t n; |
146 |
Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1; |
147 |
/* first we calculate the min and max dof on this processor to reduce costs for seraching */ |
148 |
Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in); |
149 |
|
150 |
for (p=0; p<in->MPIInfo->size; ++p) { |
151 |
if (distribution[p]<=min_DOF) p_min=p; |
152 |
if (distribution[p]<=max_DOF) p_max=p; |
153 |
} |
154 |
#pragma omp parallel for private(n,k,p) schedule(static) |
155 |
for (n=0; n<in->numNodes; ++n) { |
156 |
k=in->globalDegreesOfFreedom[n]; |
157 |
for (p=p_min; p<=p_max; ++p) { |
158 |
if (k<distribution[p+1]) { |
159 |
mpiRankOfDOF[n]=p; |
160 |
break; |
161 |
} |
162 |
} |
163 |
} |
164 |
} |
165 |
dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask) |
166 |
{ |
167 |
index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k; |
168 |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
169 |
dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs; |
170 |
#ifdef PASO_MPI |
171 |
MPI_Status status; |
172 |
#endif |
173 |
|
174 |
/* get the global range of node ids */ |
175 |
Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in); |
176 |
|
177 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
178 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
179 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
180 |
|
181 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) { |
182 |
/* distribute the range of node ids */ |
183 |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution); |
184 |
myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
185 |
/* allocate buffers */ |
186 |
DOF_buffer=TMPMEMALLOC(buffer_len,index_t); |
187 |
if (! Finley_checkPtr(DOF_buffer)) { |
188 |
/* fill DOF_buffer by the unset_dof marker to check if nodes are defined */ |
189 |
#pragma omp parallel for private(n) schedule(static) |
190 |
for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof; |
191 |
|
192 |
/* fill the buffer by sending portions around in a circle */ |
193 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
194 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
195 |
buffer_rank=in->MPIInfo->rank; |
196 |
for (p=0; p< in->MPIInfo->size; ++p) { |
197 |
if (p>0) { /* the initial send can be skipped */ |
198 |
#ifdef PASO_MPI |
199 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
200 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
201 |
in->MPIInfo->comm,&status); |
202 |
#endif |
203 |
in->MPIInfo->msg_tag_counter++; |
204 |
} |
205 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
206 |
dof_0=distribution[buffer_rank]; |
207 |
dof_1=distribution[buffer_rank+1]; |
208 |
#pragma omp parallel for private(n,k) schedule(static) |
209 |
for (n=0;n<in->numNodes;n++) { |
210 |
if (reducedNodeMask[n] >-1) { |
211 |
k=in->globalDegreesOfFreedom[n]; |
212 |
if ((dof_0<=k) && (k<dof_1)) { |
213 |
DOF_buffer[k-dof_0] = set_dof; |
214 |
} |
215 |
} |
216 |
} |
217 |
} |
218 |
/* count the entries in the DOF_buffer */ |
219 |
/* TODO: OMP parallel */ |
220 |
myNewDOFs=0; |
221 |
for (n=0; n<myDOFs; ++n) { |
222 |
if ( DOF_buffer[n] == set_dof) { |
223 |
DOF_buffer[n]=myNewDOFs; |
224 |
myNewDOFs++; |
225 |
} |
226 |
} |
227 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
228 |
loc_offsets[in->MPIInfo->rank]=myNewDOFs; |
229 |
#ifdef PASO_MPI |
230 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
231 |
globalNumReducedDOFs=0; |
232 |
for (n=0; n< in->MPIInfo->size; ++n) { |
233 |
loc_offsets[n]=globalNumReducedDOFs; |
234 |
globalNumReducedDOFs+=offsets[n]; |
235 |
} |
236 |
#else |
237 |
globalNumReducedDOFs=loc_offsets[0]; |
238 |
loc_offsets[0]=0; |
239 |
#endif |
240 |
#pragma omp parallel for private(n) schedule(static) |
241 |
for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
242 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
243 |
#pragma omp parallel for private(n) schedule(static) |
244 |
for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1; |
245 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
246 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
247 |
buffer_rank=in->MPIInfo->rank; |
248 |
for (p=0; p< in->MPIInfo->size; ++p) { |
249 |
dof_0=distribution[buffer_rank]; |
250 |
dof_1=distribution[buffer_rank+1]; |
251 |
#pragma omp parallel for private(n,k) schedule(static) |
252 |
for (n=0;n<in->numNodes;n++) { |
253 |
if (reducedNodeMask[n] >-1) { |
254 |
k=in->globalDegreesOfFreedom[n]; |
255 |
if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0]; |
256 |
} |
257 |
} |
258 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
259 |
#ifdef PASO_MPI |
260 |
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
261 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
262 |
in->MPIInfo->comm,&status); |
263 |
#endif |
264 |
in->MPIInfo->msg_tag_counter+=1; |
265 |
} |
266 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
267 |
} |
268 |
} |
269 |
TMPMEMFREE(DOF_buffer); |
270 |
} |
271 |
TMPMEMFREE(distribution); |
272 |
TMPMEMFREE(loc_offsets); |
273 |
TMPMEMFREE(offsets); |
274 |
return globalNumReducedDOFs; |
275 |
} |
276 |
dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in) |
277 |
{ |
278 |
index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k; |
279 |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
280 |
dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumNodes=0, myNewNodes; |
281 |
#ifdef PASO_MPI |
282 |
MPI_Status status; |
283 |
#endif |
284 |
|
285 |
/* get the global range of node ids */ |
286 |
Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in); |
287 |
|
288 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
289 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
290 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
291 |
|
292 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets)) ) { |
293 |
/* distribute the range of node ids */ |
294 |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution); |
295 |
myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
296 |
/* allocate buffers */ |
297 |
Node_buffer=TMPMEMALLOC(buffer_len,index_t); |
298 |
if (! Finley_checkPtr(Node_buffer)) { |
299 |
/* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */ |
300 |
#pragma omp parallel for private(n) schedule(static) |
301 |
for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID; |
302 |
|
303 |
/* fill the buffer by sending portions around in a circle */ |
304 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
305 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
306 |
buffer_rank=in->MPIInfo->rank; |
307 |
for (p=0; p< in->MPIInfo->size; ++p) { |
308 |
if (p>0) { /* the initial send can be skipped */ |
309 |
#ifdef PASO_MPI |
310 |
MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT, |
311 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
312 |
in->MPIInfo->comm,&status); |
313 |
#endif |
314 |
in->MPIInfo->msg_tag_counter++; |
315 |
} |
316 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
317 |
nodeID_0=distribution[buffer_rank]; |
318 |
nodeID_1=distribution[buffer_rank+1]; |
319 |
#pragma omp parallel for private(n,k) schedule(static) |
320 |
for (n=0;n<in->numNodes;n++) { |
321 |
k=in->Id[n]; |
322 |
if ((nodeID_0<=k) && (k<nodeID_1)) { |
323 |
Node_buffer[k-nodeID_0] = set_nodeID; |
324 |
} |
325 |
} |
326 |
} |
327 |
/* count the entries in the Node_buffer */ |
328 |
/* TODO: OMP parallel */ |
329 |
myNewNodes=0; |
330 |
for (n=0; n<myNodes; ++n) { |
331 |
if ( Node_buffer[n] == set_nodeID) { |
332 |
Node_buffer[n]=myNewNodes; |
333 |
myNewNodes++; |
334 |
} |
335 |
} |
336 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
337 |
loc_offsets[in->MPIInfo->rank]=myNewNodes; |
338 |
#ifdef PASO_MPI |
339 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
340 |
globalNumNodes=0; |
341 |
for (n=0; n< in->MPIInfo->size; ++n) { |
342 |
loc_offsets[n]=globalNumNodes; |
343 |
globalNumNodes+=offsets[n]; |
344 |
} |
345 |
#else |
346 |
globalNumNodes=loc_offsets[0]; |
347 |
loc_offsets[0]=0; |
348 |
#endif |
349 |
#pragma omp parallel for private(n) schedule(static) |
350 |
for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
351 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
352 |
#pragma omp parallel for private(n) schedule(static) |
353 |
for (n=0; n<in->numNodes; ++n) in->globalNodesIndex[n]=loc_offsets[0]-1; |
354 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
355 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
356 |
buffer_rank=in->MPIInfo->rank; |
357 |
for (p=0; p< in->MPIInfo->size; ++p) { |
358 |
nodeID_0=distribution[buffer_rank]; |
359 |
nodeID_1=distribution[buffer_rank+1]; |
360 |
#pragma omp parallel for private(n,k) schedule(static) |
361 |
for (n=0;n<in->numNodes;n++) { |
362 |
k=in->Id[n]; |
363 |
if ( ( nodeID_0<=k) && (k<nodeID_1) ) in->globalNodesIndex[n]=Node_buffer[k-nodeID_0]; |
364 |
} |
365 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
366 |
#ifdef PASO_MPI |
367 |
MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT, |
368 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
369 |
in->MPIInfo->comm,&status); |
370 |
#endif |
371 |
in->MPIInfo->msg_tag_counter+=1; |
372 |
} |
373 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
374 |
} |
375 |
} |
376 |
TMPMEMFREE(Node_buffer); |
377 |
} |
378 |
TMPMEMFREE(distribution); |
379 |
TMPMEMFREE(loc_offsets); |
380 |
TMPMEMFREE(offsets); |
381 |
return globalNumNodes; |
382 |
} |
383 |
|
384 |
dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in, index_t* reducedNodeMask) |
385 |
{ |
386 |
index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k; |
387 |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
388 |
dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes; |
389 |
#ifdef PASO_MPI |
390 |
MPI_Status status; |
391 |
#endif |
392 |
|
393 |
/* get the global range of node ids */ |
394 |
Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in); |
395 |
|
396 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
397 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
398 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
399 |
|
400 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) )) { |
401 |
/* distribute the range of node ids */ |
402 |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution); |
403 |
myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
404 |
/* allocate buffers */ |
405 |
Node_buffer=TMPMEMALLOC(buffer_len,index_t); |
406 |
if (! Finley_checkPtr(Node_buffer)) { |
407 |
/* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */ |
408 |
#pragma omp parallel for private(n) schedule(static) |
409 |
for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID; |
410 |
|
411 |
/* fill the buffer by sending portions around in a circle */ |
412 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
413 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
414 |
buffer_rank=in->MPIInfo->rank; |
415 |
for (p=0; p< in->MPIInfo->size; ++p) { |
416 |
if (p>0) { /* the initial send can be skipped */ |
417 |
#ifdef PASO_MPI |
418 |
MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT, |
419 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
420 |
in->MPIInfo->comm,&status); |
421 |
#endif |
422 |
in->MPIInfo->msg_tag_counter++; |
423 |
} |
424 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
425 |
nodeID_0=distribution[buffer_rank]; |
426 |
nodeID_1=distribution[buffer_rank+1]; |
427 |
#pragma omp parallel for private(n,k) schedule(static) |
428 |
for (n=0;n<in->numNodes;n++) { |
429 |
if (reducedNodeMask[n] >-1) { |
430 |
k=in->Id[n]; |
431 |
if ((nodeID_0<=k) && (k<nodeID_1)) { |
432 |
Node_buffer[k-nodeID_0] = set_nodeID; |
433 |
} |
434 |
} |
435 |
} |
436 |
} |
437 |
/* count the entries in the Node_buffer */ |
438 |
/* TODO: OMP parallel */ |
439 |
myNewNodes=0; |
440 |
for (n=0; n<myNodes; ++n) { |
441 |
if ( Node_buffer[n] == set_nodeID) { |
442 |
Node_buffer[n]=myNewNodes; |
443 |
myNewNodes++; |
444 |
} |
445 |
} |
446 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
447 |
loc_offsets[in->MPIInfo->rank]=myNewNodes; |
448 |
#ifdef PASO_MPI |
449 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
450 |
globalNumReducedNodes=0; |
451 |
for (n=0; n< in->MPIInfo->size; ++n) { |
452 |
loc_offsets[n]=globalNumReducedNodes; |
453 |
globalNumReducedNodes+=offsets[n]; |
454 |
} |
455 |
#else |
456 |
globalNumReducedNodes=loc_offsets[0]; |
457 |
loc_offsets[0]=0; |
458 |
#endif |
459 |
#pragma omp parallel for private(n) schedule(static) |
460 |
for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
461 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
462 |
#pragma omp parallel for private(n) schedule(static) |
463 |
for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1; |
464 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
465 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
466 |
buffer_rank=in->MPIInfo->rank; |
467 |
for (p=0; p< in->MPIInfo->size; ++p) { |
468 |
nodeID_0=distribution[buffer_rank]; |
469 |
nodeID_1=distribution[buffer_rank+1]; |
470 |
#pragma omp parallel for private(n,k) schedule(static) |
471 |
for (n=0;n<in->numNodes;n++) { |
472 |
if (reducedNodeMask[n] >-1) { |
473 |
k=in->Id[n]; |
474 |
if ( (nodeID_0<=k) && (k<nodeID_1)) in->globalReducedNodesIndex[n]=Node_buffer[k-nodeID_0]; |
475 |
} |
476 |
} |
477 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
478 |
#ifdef PASO_MPI |
479 |
MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT, |
480 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
481 |
in->MPIInfo->comm,&status); |
482 |
#endif |
483 |
in->MPIInfo->msg_tag_counter+=1; |
484 |
} |
485 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
486 |
} |
487 |
} |
488 |
TMPMEMFREE(Node_buffer); |
489 |
} |
490 |
TMPMEMFREE(distribution); |
491 |
TMPMEMFREE(loc_offsets); |
492 |
TMPMEMFREE(offsets); |
493 |
return globalNumReducedNodes; |
494 |
} |