1 |
ksteube |
1317 |
|
2 |
|
|
/* $Id$ */ |
3 |
|
|
|
4 |
|
|
/******************************************************* |
5 |
|
|
* |
6 |
|
|
* Copyright 2003-2007 by ACceSS MNRF |
7 |
|
|
* Copyright 2007 by University of Queensland |
8 |
|
|
* |
9 |
|
|
* http://esscc.uq.edu.au |
10 |
|
|
* Primary Business: Queensland, Australia |
11 |
|
|
* Licensed under the Open Software License version 3.0 |
12 |
|
|
* http://www.opensource.org/licenses/osl-3.0.php |
13 |
|
|
* |
14 |
|
|
*******************************************************/ |
15 |
|
|
|
16 |
|
|
/**************************************************************/ |
17 |
|
|
|
18 |
|
|
/* Finley: Mesh: NodeFile */ |
19 |
|
|
|
20 |
|
|
/* creates a dense labeling of the global degrees of freedom */ |
21 |
|
|
/* and returns the new number of global degrees of freedom */ |
22 |
|
|
|
23 |
|
|
/**************************************************************/ |
24 |
|
|
|
25 |
|
|
#include "NodeFile.h" |
26 |
|
|
|
27 |
|
|
/**************************************************************/ |
28 |
|
|
|
29 |
|
|
dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in) |
30 |
|
|
{ |
31 |
|
|
index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k; |
32 |
|
|
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
33 |
|
|
dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs; |
34 |
|
|
bool_t *set_new_DOF=NULL; |
35 |
|
|
#ifdef PASO_MPI |
36 |
|
|
MPI_Status status; |
37 |
|
|
#endif |
38 |
|
|
|
39 |
|
|
/* get the global range of node ids */ |
40 |
|
|
Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in); |
41 |
|
|
|
42 |
|
|
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
43 |
|
|
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
44 |
|
|
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
45 |
|
|
set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t); |
46 |
|
|
|
47 |
|
|
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) { |
48 |
|
|
/* distribute the range of node ids */ |
49 |
|
|
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution); |
50 |
|
|
myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
51 |
|
|
/* allocate buffers */ |
52 |
|
|
DOF_buffer=TMPMEMALLOC(buffer_len,index_t); |
53 |
|
|
if (! Finley_checkPtr(DOF_buffer)) { |
54 |
|
|
/* fill DOF_buffer by the unset_dof marker to check if nodes are defined */ |
55 |
|
|
#pragma omp parallel for private(n) schedule(static) |
56 |
|
|
for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof; |
57 |
|
|
|
58 |
|
|
/* fill the buffer by sending portions around in a circle */ |
59 |
|
|
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
60 |
|
|
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
61 |
|
|
buffer_rank=in->MPIInfo->rank; |
62 |
|
|
for (p=0; p< in->MPIInfo->size; ++p) { |
63 |
|
|
if (p>0) { /* the initial send can be skipped */ |
64 |
|
|
#ifdef PASO_MPI |
65 |
|
|
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
66 |
|
|
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
67 |
|
|
in->MPIInfo->comm,&status); |
68 |
|
|
#endif |
69 |
|
|
in->MPIInfo->msg_tag_counter++; |
70 |
|
|
} |
71 |
|
|
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
72 |
|
|
dof_0=distribution[buffer_rank]; |
73 |
|
|
dof_1=distribution[buffer_rank+1]; |
74 |
|
|
#pragma omp parallel for private(n,k) schedule(static) |
75 |
|
|
for (n=0;n<in->numNodes;n++) { |
76 |
|
|
k=in->globalDegreesOfFreedom[n]; |
77 |
|
|
if ((dof_0<=k) && (k<dof_1)) { |
78 |
|
|
DOF_buffer[k-dof_0] = set_dof; |
79 |
|
|
} |
80 |
|
|
} |
81 |
|
|
} |
82 |
|
|
/* count the entries in the DOF_buffer */ |
83 |
|
|
/* TODO: OMP parallel */ |
84 |
|
|
myNewDOFs=0; |
85 |
|
|
for (n=0; n<myDOFs; ++n) { |
86 |
|
|
if ( DOF_buffer[n] == set_dof) { |
87 |
|
|
DOF_buffer[n]=myNewDOFs; |
88 |
|
|
myNewDOFs++; |
89 |
|
|
} |
90 |
|
|
} |
91 |
|
|
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
92 |
|
|
loc_offsets[in->MPIInfo->rank]=myNewDOFs; |
93 |
|
|
#ifdef PASO_MPI |
94 |
|
|
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
95 |
|
|
new_numGlobalDOFs=0; |
96 |
|
|
for (n=0; n< in->MPIInfo->size; ++n) { |
97 |
|
|
loc_offsets[n]=new_numGlobalDOFs; |
98 |
|
|
new_numGlobalDOFs+=offsets[n]; |
99 |
|
|
} |
100 |
|
|
#else |
101 |
|
|
new_numGlobalDOFs=loc_offsets[0]; |
102 |
|
|
loc_offsets[0]=0; |
103 |
|
|
#endif |
104 |
gross |
1766 |
#pragma omp parallel |
105 |
|
|
{ |
106 |
|
|
#pragma omp for private(n) schedule(static) |
107 |
|
|
for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
108 |
|
|
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
109 |
|
|
#pragma omp for private(n) schedule(static) |
110 |
|
|
for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE; |
111 |
|
|
} |
112 |
ksteube |
1317 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
113 |
|
|
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
114 |
|
|
buffer_rank=in->MPIInfo->rank; |
115 |
|
|
for (p=0; p< in->MPIInfo->size; ++p) { |
116 |
|
|
dof_0=distribution[buffer_rank]; |
117 |
|
|
dof_1=distribution[buffer_rank+1]; |
118 |
|
|
#pragma omp parallel for private(n,k) schedule(static) |
119 |
|
|
for (n=0;n<in->numNodes;n++) { |
120 |
|
|
k=in->globalDegreesOfFreedom[n]; |
121 |
|
|
if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) { |
122 |
|
|
in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0]; |
123 |
|
|
set_new_DOF[n]=FALSE; |
124 |
|
|
} |
125 |
|
|
} |
126 |
|
|
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
127 |
|
|
#ifdef PASO_MPI |
128 |
|
|
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
129 |
|
|
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
130 |
|
|
in->MPIInfo->comm,&status); |
131 |
|
|
#endif |
132 |
|
|
in->MPIInfo->msg_tag_counter+=1; |
133 |
|
|
} |
134 |
|
|
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
135 |
|
|
} |
136 |
|
|
} |
137 |
|
|
TMPMEMFREE(DOF_buffer); |
138 |
|
|
} |
139 |
|
|
TMPMEMFREE(distribution); |
140 |
|
|
TMPMEMFREE(loc_offsets); |
141 |
|
|
TMPMEMFREE(offsets); |
142 |
|
|
TMPMEMFREE(set_new_DOF); |
143 |
|
|
return new_numGlobalDOFs; |
144 |
|
|
} |
145 |
|
|
|
146 |
|
|
void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){ |
147 |
|
|
index_t min_DOF,max_DOF, k; |
148 |
|
|
dim_t n; |
149 |
|
|
Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1; |
150 |
|
|
/* first we calculate the min and max dof on this processor to reduce costs for seraching */ |
151 |
|
|
Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in); |
152 |
|
|
|
153 |
|
|
for (p=0; p<in->MPIInfo->size; ++p) { |
154 |
|
|
if (distribution[p]<=min_DOF) p_min=p; |
155 |
|
|
if (distribution[p]<=max_DOF) p_max=p; |
156 |
|
|
} |
157 |
|
|
#pragma omp parallel for private(n,k,p) schedule(static) |
158 |
|
|
for (n=0; n<in->numNodes; ++n) { |
159 |
|
|
k=in->globalDegreesOfFreedom[n]; |
160 |
|
|
for (p=p_min; p<=p_max; ++p) { |
161 |
|
|
if (k<distribution[p+1]) { |
162 |
|
|
mpiRankOfDOF[n]=p; |
163 |
|
|
break; |
164 |
|
|
} |
165 |
|
|
} |
166 |
|
|
} |
167 |
|
|
} |
168 |
|
|
dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask) |
169 |
|
|
{ |
170 |
|
|
index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k; |
171 |
|
|
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
172 |
|
|
dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs; |
173 |
|
|
#ifdef PASO_MPI |
174 |
|
|
MPI_Status status; |
175 |
|
|
#endif |
176 |
|
|
|
177 |
|
|
/* get the global range of node ids */ |
178 |
|
|
Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in); |
179 |
|
|
|
180 |
|
|
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
181 |
|
|
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
182 |
|
|
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
183 |
|
|
|
184 |
|
|
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) { |
185 |
|
|
/* distribute the range of node ids */ |
186 |
|
|
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution); |
187 |
|
|
myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
188 |
|
|
/* allocate buffers */ |
189 |
|
|
DOF_buffer=TMPMEMALLOC(buffer_len,index_t); |
190 |
|
|
if (! Finley_checkPtr(DOF_buffer)) { |
191 |
|
|
/* fill DOF_buffer by the unset_dof marker to check if nodes are defined */ |
192 |
|
|
#pragma omp parallel for private(n) schedule(static) |
193 |
|
|
for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof; |
194 |
|
|
|
195 |
|
|
/* fill the buffer by sending portions around in a circle */ |
196 |
|
|
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
197 |
|
|
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
198 |
|
|
buffer_rank=in->MPIInfo->rank; |
199 |
|
|
for (p=0; p< in->MPIInfo->size; ++p) { |
200 |
|
|
if (p>0) { /* the initial send can be skipped */ |
201 |
|
|
#ifdef PASO_MPI |
202 |
|
|
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
203 |
|
|
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
204 |
|
|
in->MPIInfo->comm,&status); |
205 |
|
|
#endif |
206 |
|
|
in->MPIInfo->msg_tag_counter++; |
207 |
|
|
} |
208 |
|
|
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
209 |
|
|
dof_0=distribution[buffer_rank]; |
210 |
|
|
dof_1=distribution[buffer_rank+1]; |
211 |
|
|
#pragma omp parallel for private(n,k) schedule(static) |
212 |
|
|
for (n=0;n<in->numNodes;n++) { |
213 |
|
|
if (reducedNodeMask[n] >-1) { |
214 |
|
|
k=in->globalDegreesOfFreedom[n]; |
215 |
|
|
if ((dof_0<=k) && (k<dof_1)) { |
216 |
|
|
DOF_buffer[k-dof_0] = set_dof; |
217 |
|
|
} |
218 |
|
|
} |
219 |
|
|
} |
220 |
|
|
} |
221 |
|
|
/* count the entries in the DOF_buffer */ |
222 |
|
|
/* TODO: OMP parallel */ |
223 |
|
|
myNewDOFs=0; |
224 |
|
|
for (n=0; n<myDOFs; ++n) { |
225 |
|
|
if ( DOF_buffer[n] == set_dof) { |
226 |
|
|
DOF_buffer[n]=myNewDOFs; |
227 |
|
|
myNewDOFs++; |
228 |
|
|
} |
229 |
|
|
} |
230 |
|
|
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
231 |
|
|
loc_offsets[in->MPIInfo->rank]=myNewDOFs; |
232 |
|
|
#ifdef PASO_MPI |
233 |
|
|
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
234 |
|
|
globalNumReducedDOFs=0; |
235 |
|
|
for (n=0; n< in->MPIInfo->size; ++n) { |
236 |
|
|
loc_offsets[n]=globalNumReducedDOFs; |
237 |
|
|
globalNumReducedDOFs+=offsets[n]; |
238 |
|
|
} |
239 |
|
|
#else |
240 |
|
|
globalNumReducedDOFs=loc_offsets[0]; |
241 |
|
|
loc_offsets[0]=0; |
242 |
|
|
#endif |
243 |
|
|
#pragma omp parallel for private(n) schedule(static) |
244 |
|
|
for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
245 |
|
|
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
246 |
|
|
#pragma omp parallel for private(n) schedule(static) |
247 |
|
|
for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1; |
248 |
|
|
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
249 |
|
|
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
250 |
|
|
buffer_rank=in->MPIInfo->rank; |
251 |
|
|
for (p=0; p< in->MPIInfo->size; ++p) { |
252 |
|
|
dof_0=distribution[buffer_rank]; |
253 |
|
|
dof_1=distribution[buffer_rank+1]; |
254 |
|
|
#pragma omp parallel for private(n,k) schedule(static) |
255 |
|
|
for (n=0;n<in->numNodes;n++) { |
256 |
|
|
if (reducedNodeMask[n] >-1) { |
257 |
|
|
k=in->globalDegreesOfFreedom[n]; |
258 |
|
|
if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0]; |
259 |
|
|
} |
260 |
|
|
} |
261 |
|
|
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
262 |
|
|
#ifdef PASO_MPI |
263 |
|
|
MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT, |
264 |
|
|
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
265 |
|
|
in->MPIInfo->comm,&status); |
266 |
|
|
#endif |
267 |
|
|
in->MPIInfo->msg_tag_counter+=1; |
268 |
|
|
} |
269 |
|
|
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
270 |
|
|
} |
271 |
|
|
} |
272 |
|
|
TMPMEMFREE(DOF_buffer); |
273 |
|
|
} |
274 |
|
|
TMPMEMFREE(distribution); |
275 |
|
|
TMPMEMFREE(loc_offsets); |
276 |
|
|
TMPMEMFREE(offsets); |
277 |
|
|
return globalNumReducedDOFs; |
278 |
|
|
} |
279 |
gross |
1749 |
dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution) |
280 |
ksteube |
1317 |
{ |
281 |
gross |
1749 |
index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL; |
282 |
|
|
dim_t n, my_buffer_len, buffer_len, globalNumNodes, myNewNumNodes; |
283 |
|
|
Paso_MPI_rank p, dest, source, buffer_rank; |
284 |
|
|
const index_t unset_nodeID=-1, set_nodeID=1; |
285 |
|
|
const dim_t header_len=2; |
286 |
ksteube |
1317 |
#ifdef PASO_MPI |
287 |
|
|
MPI_Status status; |
288 |
|
|
#endif |
289 |
gross |
1749 |
Paso_MPI_rank myRank=in->MPIInfo->rank; |
290 |
ksteube |
1317 |
|
291 |
gross |
1749 |
/* find the range of node ids controled by me */ |
292 |
ksteube |
1317 |
|
293 |
gross |
1749 |
myFirstDOF=dof_distribution[in->MPIInfo->rank]; |
294 |
|
|
myLastDOF=dof_distribution[in->MPIInfo->rank+1]; |
295 |
|
|
max_id=-INDEX_T_MAX; |
296 |
|
|
min_id=INDEX_T_MAX; |
297 |
|
|
#pragma omp parallel private(loc_max_id,loc_min_id) |
298 |
|
|
{ |
299 |
|
|
loc_max_id=max_id; |
300 |
|
|
loc_min_id=min_id; |
301 |
|
|
#pragma omp for private(n,dof,id) schedule(static) |
302 |
|
|
for (n=0;n<in->numNodes;n++) { |
303 |
|
|
dof=in->globalDegreesOfFreedom[n]; |
304 |
|
|
id=in->Id[n]; |
305 |
|
|
if ((myFirstDOF<= dof) && (dof< myLastDOF)) { |
306 |
|
|
loc_max_id=MAX(loc_max_id,id); |
307 |
|
|
loc_min_id=MIN(loc_min_id,id); |
308 |
|
|
} |
309 |
|
|
} |
310 |
|
|
#pragma omp critical |
311 |
|
|
{ |
312 |
|
|
max_id=MAX(loc_max_id,max_id); |
313 |
|
|
min_id=MIN(loc_min_id,min_id); |
314 |
|
|
} |
315 |
|
|
} |
316 |
|
|
/* allocate a buffer */ |
317 |
|
|
my_buffer_len=max_id> min_id ? max_id-min_id+1 :0; |
318 |
ksteube |
1317 |
|
319 |
gross |
1749 |
#ifdef PASO_MPI |
320 |
|
|
MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm ); |
321 |
|
|
#else |
322 |
|
|
buffer_len=my_buffer_len; |
323 |
|
|
#endif |
324 |
|
|
|
325 |
|
|
Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t); |
326 |
|
|
if (! Finley_checkPtr(Node_buffer)) { |
327 |
|
|
/* mark and count the nodes in use */ |
328 |
|
|
#pragma omp parallel |
329 |
|
|
{ |
330 |
|
|
#pragma omp for private(n) schedule(static) |
331 |
gross |
1774 |
for (n=0;n<buffer_len+header_len;n++) Node_buffer[n]=unset_nodeID; |
332 |
gross |
1749 |
#pragma omp for private(n) schedule(static) |
333 |
|
|
for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1; |
334 |
|
|
#pragma omp for private(n,dof,id) schedule(static) |
335 |
|
|
for (n=0;n<in->numNodes;n++) { |
336 |
|
|
dof=in->globalDegreesOfFreedom[n]; |
337 |
|
|
id=in->Id[n]; |
338 |
|
|
if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID; |
339 |
|
|
} |
340 |
|
|
} |
341 |
|
|
myNewNumNodes=0; |
342 |
|
|
for (n=0;n<my_buffer_len;n++) { |
343 |
|
|
if (Node_buffer[header_len+n]==set_nodeID) { |
344 |
|
|
Node_buffer[header_len+n]=myNewNumNodes; |
345 |
|
|
myNewNumNodes++; |
346 |
ksteube |
1317 |
} |
347 |
gross |
1749 |
} |
348 |
|
|
/* make the local number of nodes globally available */ |
349 |
|
|
#ifdef PASO_MPI |
350 |
|
|
MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm); |
351 |
|
|
#else |
352 |
|
|
node_distribution[0]=myNewNumNodes; |
353 |
|
|
#endif |
354 |
|
|
globalNumNodes=0; |
355 |
|
|
for (p=0; p< in->MPIInfo->size; ++p) { |
356 |
|
|
itmp=node_distribution[p]; |
357 |
|
|
node_distribution[p]=globalNumNodes; |
358 |
|
|
globalNumNodes+=itmp; |
359 |
|
|
} |
360 |
|
|
node_distribution[in->MPIInfo->size]=globalNumNodes; |
361 |
|
|
|
362 |
|
|
/* offset nodebuffer */ |
363 |
|
|
itmp=node_distribution[in->MPIInfo->rank]; |
364 |
|
|
#pragma omp for private(n) schedule(static) |
365 |
|
|
for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp; |
366 |
|
|
|
367 |
|
|
/* now we send this buffer around to assign global node index: */ |
368 |
|
|
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
369 |
|
|
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
370 |
|
|
Node_buffer[0]=min_id; |
371 |
|
|
Node_buffer[1]=max_id; |
372 |
|
|
buffer_rank=in->MPIInfo->rank; |
373 |
|
|
for (p=0; p< in->MPIInfo->size; ++p) { |
374 |
|
|
nodeID_0=Node_buffer[0]; |
375 |
|
|
nodeID_1=Node_buffer[1]; |
376 |
|
|
dof_0=dof_distribution[buffer_rank]; |
377 |
|
|
dof_1=dof_distribution[buffer_rank+1]; |
378 |
|
|
if (nodeID_0<=nodeID_1) { |
379 |
|
|
#pragma omp for private(n,dof,id) schedule(static) |
380 |
|
|
for (n=0;n<in->numNodes;n++) { |
381 |
|
|
dof=in->globalDegreesOfFreedom[n]; |
382 |
|
|
id=in->Id[n]-nodeID_0; |
383 |
|
|
if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len]; |
384 |
ksteube |
1317 |
} |
385 |
gross |
1749 |
} |
386 |
|
|
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
387 |
|
|
#ifdef PASO_MPI |
388 |
|
|
MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT, |
389 |
|
|
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
390 |
|
|
in->MPIInfo->comm,&status); |
391 |
|
|
#endif |
392 |
|
|
in->MPIInfo->msg_tag_counter+=1; |
393 |
|
|
} |
394 |
|
|
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
395 |
|
|
} |
396 |
|
|
} |
397 |
|
|
TMPMEMFREE(Node_buffer); |
398 |
gross |
1766 |
return globalNumNodes; |
399 |
ksteube |
1317 |
} |
400 |
|
|
|
401 |
gross |
1749 |
dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask) |
402 |
ksteube |
1317 |
{ |
403 |
gross |
1749 |
index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k; |
404 |
ksteube |
1317 |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
405 |
|
|
dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes; |
406 |
|
|
#ifdef PASO_MPI |
407 |
|
|
MPI_Status status; |
408 |
|
|
#endif |
409 |
|
|
|
410 |
|
|
/* get the global range of node ids */ |
411 |
gross |
1749 |
Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in); |
412 |
ksteube |
1317 |
|
413 |
|
|
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
414 |
|
|
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
415 |
|
|
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
416 |
|
|
|
417 |
gross |
1749 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) { |
418 |
ksteube |
1317 |
/* distribute the range of node ids */ |
419 |
gross |
1749 |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution); |
420 |
ksteube |
1317 |
myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
421 |
|
|
/* allocate buffers */ |
422 |
gross |
1749 |
Nodes_buffer=TMPMEMALLOC(buffer_len,index_t); |
423 |
|
|
if (! Finley_checkPtr(Nodes_buffer)) { |
424 |
|
|
/* fill Nodes_buffer by the unset_node marker to check if nodes are defined */ |
425 |
ksteube |
1317 |
#pragma omp parallel for private(n) schedule(static) |
426 |
gross |
1749 |
for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node; |
427 |
ksteube |
1317 |
|
428 |
|
|
/* fill the buffer by sending portions around in a circle */ |
429 |
|
|
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
430 |
|
|
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
431 |
|
|
buffer_rank=in->MPIInfo->rank; |
432 |
|
|
for (p=0; p< in->MPIInfo->size; ++p) { |
433 |
|
|
if (p>0) { /* the initial send can be skipped */ |
434 |
|
|
#ifdef PASO_MPI |
435 |
gross |
1749 |
MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT, |
436 |
ksteube |
1317 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
437 |
|
|
in->MPIInfo->comm,&status); |
438 |
|
|
#endif |
439 |
|
|
in->MPIInfo->msg_tag_counter++; |
440 |
|
|
} |
441 |
|
|
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
442 |
gross |
1749 |
node_0=distribution[buffer_rank]; |
443 |
|
|
node_1=distribution[buffer_rank+1]; |
444 |
ksteube |
1317 |
#pragma omp parallel for private(n,k) schedule(static) |
445 |
|
|
for (n=0;n<in->numNodes;n++) { |
446 |
|
|
if (reducedNodeMask[n] >-1) { |
447 |
gross |
1749 |
k=in->globalNodesIndex[n]; |
448 |
|
|
if ((node_0<=k) && (k<node_1)) { |
449 |
|
|
Nodes_buffer[k-node_0] = set_node; |
450 |
ksteube |
1317 |
} |
451 |
|
|
} |
452 |
|
|
} |
453 |
|
|
} |
454 |
gross |
1749 |
/* count the entries in the Nodes_buffer */ |
455 |
ksteube |
1317 |
/* TODO: OMP parallel */ |
456 |
|
|
myNewNodes=0; |
457 |
|
|
for (n=0; n<myNodes; ++n) { |
458 |
gross |
1749 |
if ( Nodes_buffer[n] == set_node) { |
459 |
|
|
Nodes_buffer[n]=myNewNodes; |
460 |
ksteube |
1317 |
myNewNodes++; |
461 |
|
|
} |
462 |
|
|
} |
463 |
|
|
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
464 |
|
|
loc_offsets[in->MPIInfo->rank]=myNewNodes; |
465 |
|
|
#ifdef PASO_MPI |
466 |
|
|
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
467 |
|
|
globalNumReducedNodes=0; |
468 |
|
|
for (n=0; n< in->MPIInfo->size; ++n) { |
469 |
|
|
loc_offsets[n]=globalNumReducedNodes; |
470 |
|
|
globalNumReducedNodes+=offsets[n]; |
471 |
|
|
} |
472 |
|
|
#else |
473 |
|
|
globalNumReducedNodes=loc_offsets[0]; |
474 |
|
|
loc_offsets[0]=0; |
475 |
|
|
#endif |
476 |
|
|
#pragma omp parallel for private(n) schedule(static) |
477 |
gross |
1749 |
for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
478 |
ksteube |
1317 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
479 |
|
|
#pragma omp parallel for private(n) schedule(static) |
480 |
|
|
for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1; |
481 |
|
|
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
482 |
|
|
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
483 |
|
|
buffer_rank=in->MPIInfo->rank; |
484 |
|
|
for (p=0; p< in->MPIInfo->size; ++p) { |
485 |
gross |
1749 |
node_0=distribution[buffer_rank]; |
486 |
|
|
node_1=distribution[buffer_rank+1]; |
487 |
ksteube |
1317 |
#pragma omp parallel for private(n,k) schedule(static) |
488 |
|
|
for (n=0;n<in->numNodes;n++) { |
489 |
gross |
1749 |
if (reducedNodeMask[n] >-1) { |
490 |
|
|
k=in->globalNodesIndex[n]; |
491 |
|
|
if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0]; |
492 |
|
|
} |
493 |
ksteube |
1317 |
} |
494 |
|
|
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
495 |
|
|
#ifdef PASO_MPI |
496 |
gross |
1749 |
MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT, |
497 |
ksteube |
1317 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
498 |
|
|
in->MPIInfo->comm,&status); |
499 |
|
|
#endif |
500 |
|
|
in->MPIInfo->msg_tag_counter+=1; |
501 |
|
|
} |
502 |
|
|
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
503 |
|
|
} |
504 |
|
|
} |
505 |
gross |
1749 |
TMPMEMFREE(Nodes_buffer); |
506 |
ksteube |
1317 |
} |
507 |
|
|
TMPMEMFREE(distribution); |
508 |
|
|
TMPMEMFREE(loc_offsets); |
509 |
|
|
TMPMEMFREE(offsets); |
510 |
|
|
return globalNumReducedNodes; |
511 |
|
|
} |