/[escript]/trunk/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Annotation of /trunk/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1749 - (hide annotations)
Wed Sep 3 07:25:01 2008 UTC (10 years, 10 months ago) by gross
File MIME type: text/plain
File size: 23126 byte(s)
fixes a problem with the saveVTK writing under mpi


1 ksteube 1317
2     /* $Id$ */
3    
4     /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15    
16     /**************************************************************/
17    
18     /* Finley: Mesh: NodeFile */
19    
20     /* creates a dense labeling of the global degrees of freedom */
21     /* and returns the new number of global degrees of freedom */
22    
23     /**************************************************************/
24    
25     #include "NodeFile.h"
26    
27     /**************************************************************/
28    
29     dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
30     {
31     index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
32     Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
33     dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
34     bool_t *set_new_DOF=NULL;
35     #ifdef PASO_MPI
36     MPI_Status status;
37     #endif
38    
39     /* get the global range of node ids */
40     Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
41    
42     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
43     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
44     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
45     set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
46    
47     if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
48     /* distribute the range of node ids */
49     buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
50     myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
51     /* allocate buffers */
52     DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
53     if (! Finley_checkPtr(DOF_buffer)) {
54     /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
55     #pragma omp parallel for private(n) schedule(static)
56     for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
57    
58     /* fill the buffer by sending portions around in a circle */
59     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
60     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
61     buffer_rank=in->MPIInfo->rank;
62     for (p=0; p< in->MPIInfo->size; ++p) {
63     if (p>0) { /* the initial send can be skipped */
64     #ifdef PASO_MPI
65     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
66     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
67     in->MPIInfo->comm,&status);
68     #endif
69     in->MPIInfo->msg_tag_counter++;
70     }
71     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
72     dof_0=distribution[buffer_rank];
73     dof_1=distribution[buffer_rank+1];
74     #pragma omp parallel for private(n,k) schedule(static)
75     for (n=0;n<in->numNodes;n++) {
76     k=in->globalDegreesOfFreedom[n];
77     if ((dof_0<=k) && (k<dof_1)) {
78     DOF_buffer[k-dof_0] = set_dof;
79     }
80     }
81     }
82     /* count the entries in the DOF_buffer */
83     /* TODO: OMP parallel */
84     myNewDOFs=0;
85     for (n=0; n<myDOFs; ++n) {
86     if ( DOF_buffer[n] == set_dof) {
87     DOF_buffer[n]=myNewDOFs;
88     myNewDOFs++;
89     }
90     }
91     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
92     loc_offsets[in->MPIInfo->rank]=myNewDOFs;
93     #ifdef PASO_MPI
94     MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
95     new_numGlobalDOFs=0;
96     for (n=0; n< in->MPIInfo->size; ++n) {
97     loc_offsets[n]=new_numGlobalDOFs;
98     new_numGlobalDOFs+=offsets[n];
99     }
100     #else
101     new_numGlobalDOFs=loc_offsets[0];
102     loc_offsets[0]=0;
103     #endif
104     #pragma omp parallel for private(n) schedule(static)
105     for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
106     /* now entries are collected from the buffer again by sending the entries around in a circle */
107     #pragma omp parallel for private(n) schedule(static)
108     for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
109     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
110     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
111     buffer_rank=in->MPIInfo->rank;
112     for (p=0; p< in->MPIInfo->size; ++p) {
113     dof_0=distribution[buffer_rank];
114     dof_1=distribution[buffer_rank+1];
115     #pragma omp parallel for private(n,k) schedule(static)
116     for (n=0;n<in->numNodes;n++) {
117     k=in->globalDegreesOfFreedom[n];
118     if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
119     in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
120     set_new_DOF[n]=FALSE;
121     }
122     }
123     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
124     #ifdef PASO_MPI
125     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
126     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
127     in->MPIInfo->comm,&status);
128     #endif
129     in->MPIInfo->msg_tag_counter+=1;
130     }
131     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
132     }
133     }
134     TMPMEMFREE(DOF_buffer);
135     }
136     TMPMEMFREE(distribution);
137     TMPMEMFREE(loc_offsets);
138     TMPMEMFREE(offsets);
139     TMPMEMFREE(set_new_DOF);
140     return new_numGlobalDOFs;
141     }
142    
143     void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){
144     index_t min_DOF,max_DOF, k;
145     dim_t n;
146     Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
147     /* first we calculate the min and max dof on this processor to reduce costs for seraching */
148     Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
149    
150     for (p=0; p<in->MPIInfo->size; ++p) {
151     if (distribution[p]<=min_DOF) p_min=p;
152     if (distribution[p]<=max_DOF) p_max=p;
153     }
154     #pragma omp parallel for private(n,k,p) schedule(static)
155     for (n=0; n<in->numNodes; ++n) {
156     k=in->globalDegreesOfFreedom[n];
157     for (p=p_min; p<=p_max; ++p) {
158     if (k<distribution[p+1]) {
159     mpiRankOfDOF[n]=p;
160     break;
161     }
162     }
163     }
164     }
165     dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
166     {
167     index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
168     Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
169     dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
170     #ifdef PASO_MPI
171     MPI_Status status;
172     #endif
173    
174     /* get the global range of node ids */
175     Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
176    
177     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
178     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
179     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
180    
181     if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
182     /* distribute the range of node ids */
183     buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
184     myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
185     /* allocate buffers */
186     DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
187     if (! Finley_checkPtr(DOF_buffer)) {
188     /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
189     #pragma omp parallel for private(n) schedule(static)
190     for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
191    
192     /* fill the buffer by sending portions around in a circle */
193     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
194     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
195     buffer_rank=in->MPIInfo->rank;
196     for (p=0; p< in->MPIInfo->size; ++p) {
197     if (p>0) { /* the initial send can be skipped */
198     #ifdef PASO_MPI
199     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
200     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
201     in->MPIInfo->comm,&status);
202     #endif
203     in->MPIInfo->msg_tag_counter++;
204     }
205     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
206     dof_0=distribution[buffer_rank];
207     dof_1=distribution[buffer_rank+1];
208     #pragma omp parallel for private(n,k) schedule(static)
209     for (n=0;n<in->numNodes;n++) {
210     if (reducedNodeMask[n] >-1) {
211     k=in->globalDegreesOfFreedom[n];
212     if ((dof_0<=k) && (k<dof_1)) {
213     DOF_buffer[k-dof_0] = set_dof;
214     }
215     }
216     }
217     }
218     /* count the entries in the DOF_buffer */
219     /* TODO: OMP parallel */
220     myNewDOFs=0;
221     for (n=0; n<myDOFs; ++n) {
222     if ( DOF_buffer[n] == set_dof) {
223     DOF_buffer[n]=myNewDOFs;
224     myNewDOFs++;
225     }
226     }
227     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
228     loc_offsets[in->MPIInfo->rank]=myNewDOFs;
229     #ifdef PASO_MPI
230     MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
231     globalNumReducedDOFs=0;
232     for (n=0; n< in->MPIInfo->size; ++n) {
233     loc_offsets[n]=globalNumReducedDOFs;
234     globalNumReducedDOFs+=offsets[n];
235     }
236     #else
237     globalNumReducedDOFs=loc_offsets[0];
238     loc_offsets[0]=0;
239     #endif
240     #pragma omp parallel for private(n) schedule(static)
241     for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
242     /* now entries are collected from the buffer again by sending the entries around in a circle */
243     #pragma omp parallel for private(n) schedule(static)
244     for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
245     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
246     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
247     buffer_rank=in->MPIInfo->rank;
248     for (p=0; p< in->MPIInfo->size; ++p) {
249     dof_0=distribution[buffer_rank];
250     dof_1=distribution[buffer_rank+1];
251     #pragma omp parallel for private(n,k) schedule(static)
252     for (n=0;n<in->numNodes;n++) {
253     if (reducedNodeMask[n] >-1) {
254     k=in->globalDegreesOfFreedom[n];
255     if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
256     }
257     }
258     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
259     #ifdef PASO_MPI
260     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
261     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
262     in->MPIInfo->comm,&status);
263     #endif
264     in->MPIInfo->msg_tag_counter+=1;
265     }
266     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
267     }
268     }
269     TMPMEMFREE(DOF_buffer);
270     }
271     TMPMEMFREE(distribution);
272     TMPMEMFREE(loc_offsets);
273     TMPMEMFREE(offsets);
274     return globalNumReducedDOFs;
275     }
276 gross 1749 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution)
277 ksteube 1317 {
278 gross 1749 index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL;
279     dim_t n, my_buffer_len, buffer_len, globalNumNodes, myNewNumNodes;
280     Paso_MPI_rank p, dest, source, buffer_rank;
281     const index_t unset_nodeID=-1, set_nodeID=1;
282     const dim_t header_len=2;
283 ksteube 1317 #ifdef PASO_MPI
284     MPI_Status status;
285     #endif
286 gross 1749 Paso_MPI_rank myRank=in->MPIInfo->rank;
287 ksteube 1317
288 gross 1749 /* find the range of node ids controled by me */
289 ksteube 1317
290 gross 1749 myFirstDOF=dof_distribution[in->MPIInfo->rank];
291     myLastDOF=dof_distribution[in->MPIInfo->rank+1];
292     max_id=-INDEX_T_MAX;
293     min_id=INDEX_T_MAX;
294     #pragma omp parallel private(loc_max_id,loc_min_id)
295     {
296     loc_max_id=max_id;
297     loc_min_id=min_id;
298     #pragma omp for private(n,dof,id) schedule(static)
299     for (n=0;n<in->numNodes;n++) {
300     dof=in->globalDegreesOfFreedom[n];
301     id=in->Id[n];
302     if ((myFirstDOF<= dof) && (dof< myLastDOF)) {
303     loc_max_id=MAX(loc_max_id,id);
304     loc_min_id=MIN(loc_min_id,id);
305     }
306     }
307     #pragma omp critical
308     {
309     max_id=MAX(loc_max_id,max_id);
310     min_id=MIN(loc_min_id,min_id);
311     }
312     }
313     /* allocate a buffer */
314     my_buffer_len=max_id> min_id ? max_id-min_id+1 :0;
315 ksteube 1317
316 gross 1749 #ifdef PASO_MPI
317     MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm );
318     #else
319     buffer_len=my_buffer_len;
320     #endif
321    
322     Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t);
323     if (! Finley_checkPtr(Node_buffer)) {
324     /* mark and count the nodes in use */
325     #pragma omp parallel
326     {
327     #pragma omp for private(n) schedule(static)
328     for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID;
329     #pragma omp for private(n) schedule(static)
330     for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1;
331     #pragma omp for private(n,dof,id) schedule(static)
332     for (n=0;n<in->numNodes;n++) {
333     dof=in->globalDegreesOfFreedom[n];
334     id=in->Id[n];
335     if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID;
336     }
337     }
338     myNewNumNodes=0;
339     for (n=0;n<my_buffer_len;n++) {
340     if (Node_buffer[header_len+n]==set_nodeID) {
341     Node_buffer[header_len+n]=myNewNumNodes;
342     myNewNumNodes++;
343 ksteube 1317 }
344 gross 1749 }
345     /* make the local number of nodes globally available */
346     #ifdef PASO_MPI
347     MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm);
348     #else
349     node_distribution[0]=myNewNumNodes;
350     #endif
351     globalNumNodes=0;
352     for (p=0; p< in->MPIInfo->size; ++p) {
353     itmp=node_distribution[p];
354     node_distribution[p]=globalNumNodes;
355     globalNumNodes+=itmp;
356     }
357     node_distribution[in->MPIInfo->size]=globalNumNodes;
358    
359     /* offset nodebuffer */
360     itmp=node_distribution[in->MPIInfo->rank];
361     #pragma omp for private(n) schedule(static)
362     for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp;
363    
364     /* now we send this buffer around to assign global node index: */
365     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
366     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
367     Node_buffer[0]=min_id;
368     Node_buffer[1]=max_id;
369     buffer_rank=in->MPIInfo->rank;
370     for (p=0; p< in->MPIInfo->size; ++p) {
371     nodeID_0=Node_buffer[0];
372     nodeID_1=Node_buffer[1];
373     dof_0=dof_distribution[buffer_rank];
374     dof_1=dof_distribution[buffer_rank+1];
375     if (nodeID_0<=nodeID_1) {
376     #pragma omp for private(n,dof,id) schedule(static)
377     for (n=0;n<in->numNodes;n++) {
378     dof=in->globalDegreesOfFreedom[n];
379     id=in->Id[n]-nodeID_0;
380     if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len];
381 ksteube 1317 }
382 gross 1749 }
383     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
384     #ifdef PASO_MPI
385     MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT,
386     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
387     in->MPIInfo->comm,&status);
388     #endif
389     in->MPIInfo->msg_tag_counter+=1;
390     }
391     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
392     }
393     }
394     TMPMEMFREE(Node_buffer);
395 ksteube 1317 }
396    
397 gross 1749 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
398 ksteube 1317 {
399 gross 1749 index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k;
400 ksteube 1317 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
401     dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
402     #ifdef PASO_MPI
403     MPI_Status status;
404     #endif
405    
406     /* get the global range of node ids */
407 gross 1749 Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in);
408 ksteube 1317
409     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
410     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
411     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
412    
413 gross 1749 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
414 ksteube 1317 /* distribute the range of node ids */
415 gross 1749 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution);
416 ksteube 1317 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
417     /* allocate buffers */
418 gross 1749 Nodes_buffer=TMPMEMALLOC(buffer_len,index_t);
419     if (! Finley_checkPtr(Nodes_buffer)) {
420     /* fill Nodes_buffer by the unset_node marker to check if nodes are defined */
421 ksteube 1317 #pragma omp parallel for private(n) schedule(static)
422 gross 1749 for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node;
423 ksteube 1317
424     /* fill the buffer by sending portions around in a circle */
425     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
426     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
427     buffer_rank=in->MPIInfo->rank;
428     for (p=0; p< in->MPIInfo->size; ++p) {
429     if (p>0) { /* the initial send can be skipped */
430     #ifdef PASO_MPI
431 gross 1749 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
432 ksteube 1317 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
433     in->MPIInfo->comm,&status);
434     #endif
435     in->MPIInfo->msg_tag_counter++;
436     }
437     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
438 gross 1749 node_0=distribution[buffer_rank];
439     node_1=distribution[buffer_rank+1];
440 ksteube 1317 #pragma omp parallel for private(n,k) schedule(static)
441     for (n=0;n<in->numNodes;n++) {
442     if (reducedNodeMask[n] >-1) {
443 gross 1749 k=in->globalNodesIndex[n];
444     if ((node_0<=k) && (k<node_1)) {
445     Nodes_buffer[k-node_0] = set_node;
446 ksteube 1317 }
447     }
448     }
449     }
450 gross 1749 /* count the entries in the Nodes_buffer */
451 ksteube 1317 /* TODO: OMP parallel */
452     myNewNodes=0;
453     for (n=0; n<myNodes; ++n) {
454 gross 1749 if ( Nodes_buffer[n] == set_node) {
455     Nodes_buffer[n]=myNewNodes;
456 ksteube 1317 myNewNodes++;
457     }
458     }
459     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
460     loc_offsets[in->MPIInfo->rank]=myNewNodes;
461     #ifdef PASO_MPI
462     MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
463     globalNumReducedNodes=0;
464     for (n=0; n< in->MPIInfo->size; ++n) {
465     loc_offsets[n]=globalNumReducedNodes;
466     globalNumReducedNodes+=offsets[n];
467     }
468     #else
469     globalNumReducedNodes=loc_offsets[0];
470     loc_offsets[0]=0;
471     #endif
472     #pragma omp parallel for private(n) schedule(static)
473 gross 1749 for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank];
474 ksteube 1317 /* now entries are collected from the buffer again by sending the entries around in a circle */
475     #pragma omp parallel for private(n) schedule(static)
476     for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
477     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
478     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
479     buffer_rank=in->MPIInfo->rank;
480     for (p=0; p< in->MPIInfo->size; ++p) {
481 gross 1749 node_0=distribution[buffer_rank];
482     node_1=distribution[buffer_rank+1];
483 ksteube 1317 #pragma omp parallel for private(n,k) schedule(static)
484     for (n=0;n<in->numNodes;n++) {
485 gross 1749 if (reducedNodeMask[n] >-1) {
486     k=in->globalNodesIndex[n];
487     if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0];
488     }
489 ksteube 1317 }
490     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
491     #ifdef PASO_MPI
492 gross 1749 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
493 ksteube 1317 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
494     in->MPIInfo->comm,&status);
495     #endif
496     in->MPIInfo->msg_tag_counter+=1;
497     }
498     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
499     }
500     }
501 gross 1749 TMPMEMFREE(Nodes_buffer);
502 ksteube 1317 }
503     TMPMEMFREE(distribution);
504     TMPMEMFREE(loc_offsets);
505     TMPMEMFREE(offsets);
506     return globalNumReducedNodes;
507     }

  ViewVC Help
Powered by ViewVC 1.1.26