/[escript]/trunk/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Contents of /trunk/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3603 - (show annotations)
Mon Sep 19 03:42:53 2011 UTC (7 years, 10 months ago) by caltinay
File MIME type: text/plain
File size: 23400 byte(s)
Fixed some unused var errors in non-mpi build.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Finley: Mesh: NodeFile */
18
19 /* creates a dense labeling of the global degrees of freedom */
20 /* and returns the new number of global degrees of freedom */
21
22 /**************************************************************/
23
24 #include "NodeFile.h"
25
26 /**************************************************************/
27
28 dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
29 {
30 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
31 Esys_MPI_rank buffer_rank, *distribution=NULL;
32 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
33 bool_t *set_new_DOF=NULL;
34 #ifdef ESYS_MPI
35 Esys_MPI_rank dest, source;
36 MPI_Status status;
37 #endif
38
39 /* get the global range of node ids */
40 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
41
42 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
43 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
44 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
45 set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
46
47 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
48 /* distribute the range of node ids */
49 buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
50 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
51 /* allocate buffers */
52 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
53 if (! Finley_checkPtr(DOF_buffer)) {
54 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
55 #pragma omp parallel for private(n) schedule(static)
56 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
57
58 /* fill the buffer by sending portions around in a circle */
59 #ifdef ESYS_MPI
60 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
61 source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
62 #endif
63 buffer_rank=in->MPIInfo->rank;
64 for (p=0; p< in->MPIInfo->size; ++p) {
65 if (p>0) { /* the initial send can be skipped */
66 #ifdef ESYS_MPI
67 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
68 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
69 in->MPIInfo->comm,&status);
70 #endif
71 in->MPIInfo->msg_tag_counter++;
72 }
73 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
74 dof_0=distribution[buffer_rank];
75 dof_1=distribution[buffer_rank+1];
76 #pragma omp parallel for private(n,k) schedule(static)
77 for (n=0;n<in->numNodes;n++) {
78 k=in->globalDegreesOfFreedom[n];
79 if ((dof_0<=k) && (k<dof_1)) {
80 DOF_buffer[k-dof_0] = set_dof;
81 }
82 }
83 }
84 /* count the entries in the DOF_buffer */
85 /* TODO: OMP parallel */
86 myNewDOFs=0;
87 for (n=0; n<myDOFs; ++n) {
88 if ( DOF_buffer[n] == set_dof) {
89 DOF_buffer[n]=myNewDOFs;
90 myNewDOFs++;
91 }
92 }
93 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
94 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
95 #ifdef ESYS_MPI
96 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
97 new_numGlobalDOFs=0;
98 for (n=0; n< in->MPIInfo->size; ++n) {
99 loc_offsets[n]=new_numGlobalDOFs;
100 new_numGlobalDOFs+=offsets[n];
101 }
102 #else
103 new_numGlobalDOFs=loc_offsets[0];
104 loc_offsets[0]=0;
105 #endif
106 #pragma omp parallel
107 {
108 #pragma omp for private(n) schedule(static)
109 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
110 /* now entries are collected from the buffer again by sending the entries around in a circle */
111 #pragma omp for private(n) schedule(static)
112 for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
113 }
114 #ifdef ESYS_MPI
115 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
116 source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
117 #endif
118 buffer_rank=in->MPIInfo->rank;
119 for (p=0; p< in->MPIInfo->size; ++p) {
120 dof_0=distribution[buffer_rank];
121 dof_1=distribution[buffer_rank+1];
122 #pragma omp parallel for private(n,k) schedule(static)
123 for (n=0;n<in->numNodes;n++) {
124 k=in->globalDegreesOfFreedom[n];
125 if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
126 in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
127 set_new_DOF[n]=FALSE;
128 }
129 }
130 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
131 #ifdef ESYS_MPI
132 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
133 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
134 in->MPIInfo->comm,&status);
135 #endif
136 in->MPIInfo->msg_tag_counter+=1;
137 }
138 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
139 }
140 }
141 TMPMEMFREE(DOF_buffer);
142 }
143 TMPMEMFREE(distribution);
144 TMPMEMFREE(loc_offsets);
145 TMPMEMFREE(offsets);
146 TMPMEMFREE(set_new_DOF);
147 return new_numGlobalDOFs;
148 }
149
150 void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Esys_MPI_rank* mpiRankOfDOF, index_t *distribution){
151 index_t min_DOF,max_DOF, k;
152 dim_t n;
153 Esys_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
154 /* first we calculate the min and max dof on this processor to reduce costs for seraching */
155 Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
156
157 for (p=0; p<in->MPIInfo->size; ++p) {
158 if (distribution[p]<=min_DOF) p_min=p;
159 if (distribution[p]<=max_DOF) p_max=p;
160 }
161 #pragma omp parallel for private(n,k,p) schedule(static)
162 for (n=0; n<in->numNodes; ++n) {
163 k=in->globalDegreesOfFreedom[n];
164 for (p=p_min; p<=p_max; ++p) {
165 if (k<distribution[p+1]) {
166 mpiRankOfDOF[n]=p;
167 break;
168 }
169 }
170 }
171 }
172 dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
173 {
174 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
175 Esys_MPI_rank buffer_rank, *distribution=NULL;
176 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
177 #ifdef ESYS_MPI
178 Esys_MPI_rank dest, source;
179 MPI_Status status;
180 #endif
181
182 /* get the global range of node ids */
183 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
184
185 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
186 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
187 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
188
189 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
190 /* distribute the range of node ids */
191 buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
192 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
193 /* allocate buffers */
194 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
195 if (! Finley_checkPtr(DOF_buffer)) {
196 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
197 #pragma omp parallel for private(n) schedule(static)
198 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
199
200 /* fill the buffer by sending portions around in a circle */
201 #ifdef ESYS_MPI
202 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
203 source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
204 #endif
205 buffer_rank=in->MPIInfo->rank;
206 for (p=0; p< in->MPIInfo->size; ++p) {
207 if (p>0) { /* the initial send can be skipped */
208 #ifdef ESYS_MPI
209 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
210 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
211 in->MPIInfo->comm,&status);
212 #endif
213 in->MPIInfo->msg_tag_counter++;
214 }
215 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
216 dof_0=distribution[buffer_rank];
217 dof_1=distribution[buffer_rank+1];
218 #pragma omp parallel for private(n,k) schedule(static)
219 for (n=0;n<in->numNodes;n++) {
220 if (reducedNodeMask[n] >-1) {
221 k=in->globalDegreesOfFreedom[n];
222 if ((dof_0<=k) && (k<dof_1)) {
223 DOF_buffer[k-dof_0] = set_dof;
224 }
225 }
226 }
227 }
228 /* count the entries in the DOF_buffer */
229 /* TODO: OMP parallel */
230 myNewDOFs=0;
231 for (n=0; n<myDOFs; ++n) {
232 if ( DOF_buffer[n] == set_dof) {
233 DOF_buffer[n]=myNewDOFs;
234 myNewDOFs++;
235 }
236 }
237 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
238 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
239 #ifdef ESYS_MPI
240 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
241 globalNumReducedDOFs=0;
242 for (n=0; n< in->MPIInfo->size; ++n) {
243 loc_offsets[n]=globalNumReducedDOFs;
244 globalNumReducedDOFs+=offsets[n];
245 }
246 #else
247 globalNumReducedDOFs=loc_offsets[0];
248 loc_offsets[0]=0;
249 #endif
250 #pragma omp parallel for private(n) schedule(static)
251 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
252 /* now entries are collected from the buffer again by sending the entries around in a circle */
253 #pragma omp parallel for private(n) schedule(static)
254 for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
255 #ifdef ESYS_MPI
256 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
257 source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
258 #endif
259 buffer_rank=in->MPIInfo->rank;
260 for (p=0; p< in->MPIInfo->size; ++p) {
261 dof_0=distribution[buffer_rank];
262 dof_1=distribution[buffer_rank+1];
263 #pragma omp parallel for private(n,k) schedule(static)
264 for (n=0;n<in->numNodes;n++) {
265 if (reducedNodeMask[n] >-1) {
266 k=in->globalDegreesOfFreedom[n];
267 if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
268 }
269 }
270 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
271 #ifdef ESYS_MPI
272 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
273 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
274 in->MPIInfo->comm,&status);
275 #endif
276 in->MPIInfo->msg_tag_counter+=1;
277 }
278 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
279 }
280 }
281 TMPMEMFREE(DOF_buffer);
282 }
283 TMPMEMFREE(distribution);
284 TMPMEMFREE(loc_offsets);
285 TMPMEMFREE(offsets);
286 return globalNumReducedDOFs;
287 }
288 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution)
289 {
290 index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL;
291 dim_t n, my_buffer_len, buffer_len, globalNumNodes=0, myNewNumNodes;
292 Esys_MPI_rank p, buffer_rank;
293 const index_t unset_nodeID=-1, set_nodeID=1;
294 const dim_t header_len=2;
295 #ifdef ESYS_MPI
296 Esys_MPI_rank dest, source;
297 MPI_Status status;
298 #endif
299 Esys_MPI_rank myRank=in->MPIInfo->rank;
300
301 /* find the range of node ids controled by me */
302
303 myFirstDOF=dof_distribution[myRank];
304 myLastDOF=dof_distribution[myRank+1];
305 max_id=-INDEX_T_MAX;
306 min_id=INDEX_T_MAX;
307 #pragma omp parallel private(loc_max_id,loc_min_id)
308 {
309 loc_max_id=max_id;
310 loc_min_id=min_id;
311 #pragma omp for private(n,dof,id) schedule(static)
312 for (n=0;n<in->numNodes;n++) {
313 dof=in->globalDegreesOfFreedom[n];
314 id=in->Id[n];
315 if ((myFirstDOF<= dof) && (dof< myLastDOF)) {
316 loc_max_id=MAX(loc_max_id,id);
317 loc_min_id=MIN(loc_min_id,id);
318 }
319 }
320 #pragma omp critical
321 {
322 max_id=MAX(loc_max_id,max_id);
323 min_id=MIN(loc_min_id,min_id);
324 }
325 }
326 /* allocate a buffer */
327 my_buffer_len=max_id>=min_id ? max_id-min_id+1 :0;
328
329 #ifdef ESYS_MPI
330 MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm );
331 #else
332 buffer_len=my_buffer_len;
333 #endif
334
335 Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t);
336 if (! Finley_checkPtr(Node_buffer)) {
337 /* mark and count the nodes in use */
338 #pragma omp parallel
339 {
340 #pragma omp for private(n) schedule(static)
341 for (n=0;n<buffer_len+header_len;n++) Node_buffer[n]=unset_nodeID;
342 #pragma omp for private(n) schedule(static)
343 for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1;
344 #pragma omp for private(n,dof,id) schedule(static)
345 for (n=0;n<in->numNodes;n++) {
346 dof=in->globalDegreesOfFreedom[n];
347 id=in->Id[n];
348 if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID;
349 }
350 }
351 myNewNumNodes=0;
352 for (n=0;n<my_buffer_len;n++) {
353 if (Node_buffer[header_len+n]==set_nodeID) {
354 Node_buffer[header_len+n]=myNewNumNodes;
355 myNewNumNodes++;
356 }
357 }
358 /* make the local number of nodes globally available */
359 #ifdef ESYS_MPI
360 MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm);
361 #else
362 node_distribution[0]=myNewNumNodes;
363 #endif
364
365
366 globalNumNodes=0;
367 for (p=0; p< in->MPIInfo->size; ++p) {
368 itmp=node_distribution[p];
369 node_distribution[p]=globalNumNodes;
370 globalNumNodes+=itmp;
371 }
372 node_distribution[in->MPIInfo->size]=globalNumNodes;
373
374 /* offset nodebuffer */
375 itmp=node_distribution[in->MPIInfo->rank];
376 #pragma omp for private(n) schedule(static)
377 for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp;
378
379
380 /* now we send this buffer around to assign global node index: */
381 #ifdef ESYS_MPI
382 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
383 source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
384 #endif
385 Node_buffer[0]=min_id;
386 Node_buffer[1]=max_id;
387 buffer_rank=in->MPIInfo->rank;
388 for (p=0; p< in->MPIInfo->size; ++p) {
389 nodeID_0=Node_buffer[0];
390 nodeID_1=Node_buffer[1];
391 dof_0=dof_distribution[buffer_rank];
392 dof_1=dof_distribution[buffer_rank+1];
393 if (nodeID_0<=nodeID_1) {
394 #pragma omp for private(n,dof,id) schedule(static)
395 for (n=0;n<in->numNodes;n++) {
396 dof=in->globalDegreesOfFreedom[n];
397 id=in->Id[n]-nodeID_0;
398 if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len];
399 }
400 }
401 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
402 #ifdef ESYS_MPI
403 MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT,
404 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
405 in->MPIInfo->comm,&status);
406 #endif
407 in->MPIInfo->msg_tag_counter+=1;
408 }
409 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
410 }
411 }
412 TMPMEMFREE(Node_buffer);
413 return globalNumNodes;
414 }
415
416 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
417 {
418 index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k;
419 Esys_MPI_rank buffer_rank, *distribution=NULL;
420 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
421 #ifdef ESYS_MPI
422 Esys_MPI_rank dest, source;
423 MPI_Status status;
424 #endif
425
426 /* get the global range of node ids */
427 Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in);
428
429 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
430 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
431 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
432
433 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
434 /* distribute the range of node ids */
435 buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution);
436 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
437 /* allocate buffers */
438 Nodes_buffer=TMPMEMALLOC(buffer_len,index_t);
439 if (! Finley_checkPtr(Nodes_buffer)) {
440 /* fill Nodes_buffer by the unset_node marker to check if nodes are defined */
441 #pragma omp parallel for private(n) schedule(static)
442 for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node;
443
444 /* fill the buffer by sending portions around in a circle */
445 #ifdef ESYS_MPI
446 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
447 source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
448 #endif
449 buffer_rank=in->MPIInfo->rank;
450 for (p=0; p< in->MPIInfo->size; ++p) {
451 if (p>0) { /* the initial send can be skipped */
452 #ifdef ESYS_MPI
453 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
454 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
455 in->MPIInfo->comm,&status);
456 #endif
457 in->MPIInfo->msg_tag_counter++;
458 }
459 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
460 node_0=distribution[buffer_rank];
461 node_1=distribution[buffer_rank+1];
462 #pragma omp parallel for private(n,k) schedule(static)
463 for (n=0;n<in->numNodes;n++) {
464 if (reducedNodeMask[n] >-1) {
465 k=in->globalNodesIndex[n];
466 if ((node_0<=k) && (k<node_1)) {
467 Nodes_buffer[k-node_0] = set_node;
468 }
469 }
470 }
471 }
472 /* count the entries in the Nodes_buffer */
473 /* TODO: OMP parallel */
474 myNewNodes=0;
475 for (n=0; n<myNodes; ++n) {
476 if ( Nodes_buffer[n] == set_node) {
477 Nodes_buffer[n]=myNewNodes;
478 myNewNodes++;
479 }
480 }
481 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
482 loc_offsets[in->MPIInfo->rank]=myNewNodes;
483 #ifdef ESYS_MPI
484 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
485 globalNumReducedNodes=0;
486 for (n=0; n< in->MPIInfo->size; ++n) {
487 loc_offsets[n]=globalNumReducedNodes;
488 globalNumReducedNodes+=offsets[n];
489 }
490 #else
491 globalNumReducedNodes=loc_offsets[0];
492 loc_offsets[0]=0;
493 #endif
494 #pragma omp parallel for private(n) schedule(static)
495 for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank];
496 /* now entries are collected from the buffer again by sending the entries around in a circle */
497 #pragma omp parallel for private(n) schedule(static)
498 for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
499 #ifdef ESYS_MPI
500 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
501 source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
502 #endif
503 buffer_rank=in->MPIInfo->rank;
504 for (p=0; p< in->MPIInfo->size; ++p) {
505 node_0=distribution[buffer_rank];
506 node_1=distribution[buffer_rank+1];
507 #pragma omp parallel for private(n,k) schedule(static)
508 for (n=0;n<in->numNodes;n++) {
509 if (reducedNodeMask[n] >-1) {
510 k=in->globalNodesIndex[n];
511 if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0];
512 }
513 }
514 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
515 #ifdef ESYS_MPI
516 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
517 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
518 in->MPIInfo->comm,&status);
519 #endif
520 in->MPIInfo->msg_tag_counter+=1;
521 }
522 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
523 }
524 }
525 TMPMEMFREE(Nodes_buffer);
526 }
527 TMPMEMFREE(distribution);
528 TMPMEMFREE(loc_offsets);
529 TMPMEMFREE(offsets);
530 return globalNumReducedNodes;
531 }

  ViewVC Help
Powered by ViewVC 1.1.26