/[escript]/trunk/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Annotation of /trunk/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1774 - (hide annotations)
Tue Sep 9 03:50:45 2008 UTC (13 years, 11 months ago) by gross
File MIME type: text/plain
File size: 23227 byte(s)
bug: unitialized values
1 ksteube 1317
2     /* $Id$ */
3    
4     /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15    
16     /**************************************************************/
17    
18     /* Finley: Mesh: NodeFile */
19    
20     /* creates a dense labeling of the global degrees of freedom */
21     /* and returns the new number of global degrees of freedom */
22    
23     /**************************************************************/
24    
25     #include "NodeFile.h"
26    
27     /**************************************************************/
28    
29     dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
30     {
31     index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
32     Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
33     dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
34     bool_t *set_new_DOF=NULL;
35     #ifdef PASO_MPI
36     MPI_Status status;
37     #endif
38    
39     /* get the global range of node ids */
40     Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
41    
42     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
43     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
44     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
45     set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
46    
47     if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
48     /* distribute the range of node ids */
49     buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
50     myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
51     /* allocate buffers */
52     DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
53     if (! Finley_checkPtr(DOF_buffer)) {
54     /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
55     #pragma omp parallel for private(n) schedule(static)
56     for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
57    
58     /* fill the buffer by sending portions around in a circle */
59     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
60     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
61     buffer_rank=in->MPIInfo->rank;
62     for (p=0; p< in->MPIInfo->size; ++p) {
63     if (p>0) { /* the initial send can be skipped */
64     #ifdef PASO_MPI
65     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
66     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
67     in->MPIInfo->comm,&status);
68     #endif
69     in->MPIInfo->msg_tag_counter++;
70     }
71     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
72     dof_0=distribution[buffer_rank];
73     dof_1=distribution[buffer_rank+1];
74     #pragma omp parallel for private(n,k) schedule(static)
75     for (n=0;n<in->numNodes;n++) {
76     k=in->globalDegreesOfFreedom[n];
77     if ((dof_0<=k) && (k<dof_1)) {
78     DOF_buffer[k-dof_0] = set_dof;
79     }
80     }
81     }
82     /* count the entries in the DOF_buffer */
83     /* TODO: OMP parallel */
84     myNewDOFs=0;
85     for (n=0; n<myDOFs; ++n) {
86     if ( DOF_buffer[n] == set_dof) {
87     DOF_buffer[n]=myNewDOFs;
88     myNewDOFs++;
89     }
90     }
91     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
92     loc_offsets[in->MPIInfo->rank]=myNewDOFs;
93     #ifdef PASO_MPI
94     MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
95     new_numGlobalDOFs=0;
96     for (n=0; n< in->MPIInfo->size; ++n) {
97     loc_offsets[n]=new_numGlobalDOFs;
98     new_numGlobalDOFs+=offsets[n];
99     }
100     #else
101     new_numGlobalDOFs=loc_offsets[0];
102     loc_offsets[0]=0;
103     #endif
104 gross 1766 #pragma omp parallel
105     {
106     #pragma omp for private(n) schedule(static)
107     for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
108     /* now entries are collected from the buffer again by sending the entries around in a circle */
109     #pragma omp for private(n) schedule(static)
110     for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
111     }
112 ksteube 1317 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
113     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
114     buffer_rank=in->MPIInfo->rank;
115     for (p=0; p< in->MPIInfo->size; ++p) {
116     dof_0=distribution[buffer_rank];
117     dof_1=distribution[buffer_rank+1];
118     #pragma omp parallel for private(n,k) schedule(static)
119     for (n=0;n<in->numNodes;n++) {
120     k=in->globalDegreesOfFreedom[n];
121     if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
122     in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
123     set_new_DOF[n]=FALSE;
124     }
125     }
126     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
127     #ifdef PASO_MPI
128     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
129     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
130     in->MPIInfo->comm,&status);
131     #endif
132     in->MPIInfo->msg_tag_counter+=1;
133     }
134     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
135     }
136     }
137     TMPMEMFREE(DOF_buffer);
138     }
139     TMPMEMFREE(distribution);
140     TMPMEMFREE(loc_offsets);
141     TMPMEMFREE(offsets);
142     TMPMEMFREE(set_new_DOF);
143     return new_numGlobalDOFs;
144     }
145    
146     void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){
147     index_t min_DOF,max_DOF, k;
148     dim_t n;
149     Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
150     /* first we calculate the min and max dof on this processor to reduce costs for seraching */
151     Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
152    
153     for (p=0; p<in->MPIInfo->size; ++p) {
154     if (distribution[p]<=min_DOF) p_min=p;
155     if (distribution[p]<=max_DOF) p_max=p;
156     }
157     #pragma omp parallel for private(n,k,p) schedule(static)
158     for (n=0; n<in->numNodes; ++n) {
159     k=in->globalDegreesOfFreedom[n];
160     for (p=p_min; p<=p_max; ++p) {
161     if (k<distribution[p+1]) {
162     mpiRankOfDOF[n]=p;
163     break;
164     }
165     }
166     }
167     }
168     dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
169     {
170     index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
171     Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
172     dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
173     #ifdef PASO_MPI
174     MPI_Status status;
175     #endif
176    
177     /* get the global range of node ids */
178     Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
179    
180     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
181     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
182     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
183    
184     if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
185     /* distribute the range of node ids */
186     buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
187     myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
188     /* allocate buffers */
189     DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
190     if (! Finley_checkPtr(DOF_buffer)) {
191     /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
192     #pragma omp parallel for private(n) schedule(static)
193     for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
194    
195     /* fill the buffer by sending portions around in a circle */
196     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
197     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
198     buffer_rank=in->MPIInfo->rank;
199     for (p=0; p< in->MPIInfo->size; ++p) {
200     if (p>0) { /* the initial send can be skipped */
201     #ifdef PASO_MPI
202     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
203     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
204     in->MPIInfo->comm,&status);
205     #endif
206     in->MPIInfo->msg_tag_counter++;
207     }
208     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
209     dof_0=distribution[buffer_rank];
210     dof_1=distribution[buffer_rank+1];
211     #pragma omp parallel for private(n,k) schedule(static)
212     for (n=0;n<in->numNodes;n++) {
213     if (reducedNodeMask[n] >-1) {
214     k=in->globalDegreesOfFreedom[n];
215     if ((dof_0<=k) && (k<dof_1)) {
216     DOF_buffer[k-dof_0] = set_dof;
217     }
218     }
219     }
220     }
221     /* count the entries in the DOF_buffer */
222     /* TODO: OMP parallel */
223     myNewDOFs=0;
224     for (n=0; n<myDOFs; ++n) {
225     if ( DOF_buffer[n] == set_dof) {
226     DOF_buffer[n]=myNewDOFs;
227     myNewDOFs++;
228     }
229     }
230     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
231     loc_offsets[in->MPIInfo->rank]=myNewDOFs;
232     #ifdef PASO_MPI
233     MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
234     globalNumReducedDOFs=0;
235     for (n=0; n< in->MPIInfo->size; ++n) {
236     loc_offsets[n]=globalNumReducedDOFs;
237     globalNumReducedDOFs+=offsets[n];
238     }
239     #else
240     globalNumReducedDOFs=loc_offsets[0];
241     loc_offsets[0]=0;
242     #endif
243     #pragma omp parallel for private(n) schedule(static)
244     for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
245     /* now entries are collected from the buffer again by sending the entries around in a circle */
246     #pragma omp parallel for private(n) schedule(static)
247     for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
248     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
249     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
250     buffer_rank=in->MPIInfo->rank;
251     for (p=0; p< in->MPIInfo->size; ++p) {
252     dof_0=distribution[buffer_rank];
253     dof_1=distribution[buffer_rank+1];
254     #pragma omp parallel for private(n,k) schedule(static)
255     for (n=0;n<in->numNodes;n++) {
256     if (reducedNodeMask[n] >-1) {
257     k=in->globalDegreesOfFreedom[n];
258     if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
259     }
260     }
261     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
262     #ifdef PASO_MPI
263     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
264     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
265     in->MPIInfo->comm,&status);
266     #endif
267     in->MPIInfo->msg_tag_counter+=1;
268     }
269     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
270     }
271     }
272     TMPMEMFREE(DOF_buffer);
273     }
274     TMPMEMFREE(distribution);
275     TMPMEMFREE(loc_offsets);
276     TMPMEMFREE(offsets);
277     return globalNumReducedDOFs;
278     }
279 gross 1749 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution)
280 ksteube 1317 {
281 gross 1749 index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL;
282     dim_t n, my_buffer_len, buffer_len, globalNumNodes, myNewNumNodes;
283     Paso_MPI_rank p, dest, source, buffer_rank;
284     const index_t unset_nodeID=-1, set_nodeID=1;
285     const dim_t header_len=2;
286 ksteube 1317 #ifdef PASO_MPI
287     MPI_Status status;
288     #endif
289 gross 1749 Paso_MPI_rank myRank=in->MPIInfo->rank;
290 ksteube 1317
291 gross 1749 /* find the range of node ids controled by me */
292 ksteube 1317
293 gross 1749 myFirstDOF=dof_distribution[in->MPIInfo->rank];
294     myLastDOF=dof_distribution[in->MPIInfo->rank+1];
295     max_id=-INDEX_T_MAX;
296     min_id=INDEX_T_MAX;
297     #pragma omp parallel private(loc_max_id,loc_min_id)
298     {
299     loc_max_id=max_id;
300     loc_min_id=min_id;
301     #pragma omp for private(n,dof,id) schedule(static)
302     for (n=0;n<in->numNodes;n++) {
303     dof=in->globalDegreesOfFreedom[n];
304     id=in->Id[n];
305     if ((myFirstDOF<= dof) && (dof< myLastDOF)) {
306     loc_max_id=MAX(loc_max_id,id);
307     loc_min_id=MIN(loc_min_id,id);
308     }
309     }
310     #pragma omp critical
311     {
312     max_id=MAX(loc_max_id,max_id);
313     min_id=MIN(loc_min_id,min_id);
314     }
315     }
316     /* allocate a buffer */
317     my_buffer_len=max_id> min_id ? max_id-min_id+1 :0;
318 ksteube 1317
319 gross 1749 #ifdef PASO_MPI
320     MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm );
321     #else
322     buffer_len=my_buffer_len;
323     #endif
324    
325     Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t);
326     if (! Finley_checkPtr(Node_buffer)) {
327     /* mark and count the nodes in use */
328     #pragma omp parallel
329     {
330     #pragma omp for private(n) schedule(static)
331 gross 1774 for (n=0;n<buffer_len+header_len;n++) Node_buffer[n]=unset_nodeID;
332 gross 1749 #pragma omp for private(n) schedule(static)
333     for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1;
334     #pragma omp for private(n,dof,id) schedule(static)
335     for (n=0;n<in->numNodes;n++) {
336     dof=in->globalDegreesOfFreedom[n];
337     id=in->Id[n];
338     if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID;
339     }
340     }
341     myNewNumNodes=0;
342     for (n=0;n<my_buffer_len;n++) {
343     if (Node_buffer[header_len+n]==set_nodeID) {
344     Node_buffer[header_len+n]=myNewNumNodes;
345     myNewNumNodes++;
346 ksteube 1317 }
347 gross 1749 }
348     /* make the local number of nodes globally available */
349     #ifdef PASO_MPI
350     MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm);
351     #else
352     node_distribution[0]=myNewNumNodes;
353     #endif
354     globalNumNodes=0;
355     for (p=0; p< in->MPIInfo->size; ++p) {
356     itmp=node_distribution[p];
357     node_distribution[p]=globalNumNodes;
358     globalNumNodes+=itmp;
359     }
360     node_distribution[in->MPIInfo->size]=globalNumNodes;
361    
362     /* offset nodebuffer */
363     itmp=node_distribution[in->MPIInfo->rank];
364     #pragma omp for private(n) schedule(static)
365     for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp;
366    
367     /* now we send this buffer around to assign global node index: */
368     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
369     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
370     Node_buffer[0]=min_id;
371     Node_buffer[1]=max_id;
372     buffer_rank=in->MPIInfo->rank;
373     for (p=0; p< in->MPIInfo->size; ++p) {
374     nodeID_0=Node_buffer[0];
375     nodeID_1=Node_buffer[1];
376     dof_0=dof_distribution[buffer_rank];
377     dof_1=dof_distribution[buffer_rank+1];
378     if (nodeID_0<=nodeID_1) {
379     #pragma omp for private(n,dof,id) schedule(static)
380     for (n=0;n<in->numNodes;n++) {
381     dof=in->globalDegreesOfFreedom[n];
382     id=in->Id[n]-nodeID_0;
383     if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len];
384 ksteube 1317 }
385 gross 1749 }
386     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
387     #ifdef PASO_MPI
388     MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT,
389     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
390     in->MPIInfo->comm,&status);
391     #endif
392     in->MPIInfo->msg_tag_counter+=1;
393     }
394     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
395     }
396     }
397     TMPMEMFREE(Node_buffer);
398 gross 1766 return globalNumNodes;
399 ksteube 1317 }
400    
401 gross 1749 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
402 ksteube 1317 {
403 gross 1749 index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k;
404 ksteube 1317 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
405     dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
406     #ifdef PASO_MPI
407     MPI_Status status;
408     #endif
409    
410     /* get the global range of node ids */
411 gross 1749 Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in);
412 ksteube 1317
413     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
414     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
415     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
416    
417 gross 1749 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
418 ksteube 1317 /* distribute the range of node ids */
419 gross 1749 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution);
420 ksteube 1317 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
421     /* allocate buffers */
422 gross 1749 Nodes_buffer=TMPMEMALLOC(buffer_len,index_t);
423     if (! Finley_checkPtr(Nodes_buffer)) {
424     /* fill Nodes_buffer by the unset_node marker to check if nodes are defined */
425 ksteube 1317 #pragma omp parallel for private(n) schedule(static)
426 gross 1749 for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node;
427 ksteube 1317
428     /* fill the buffer by sending portions around in a circle */
429     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
430     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
431     buffer_rank=in->MPIInfo->rank;
432     for (p=0; p< in->MPIInfo->size; ++p) {
433     if (p>0) { /* the initial send can be skipped */
434     #ifdef PASO_MPI
435 gross 1749 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
436 ksteube 1317 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
437     in->MPIInfo->comm,&status);
438     #endif
439     in->MPIInfo->msg_tag_counter++;
440     }
441     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
442 gross 1749 node_0=distribution[buffer_rank];
443     node_1=distribution[buffer_rank+1];
444 ksteube 1317 #pragma omp parallel for private(n,k) schedule(static)
445     for (n=0;n<in->numNodes;n++) {
446     if (reducedNodeMask[n] >-1) {
447 gross 1749 k=in->globalNodesIndex[n];
448     if ((node_0<=k) && (k<node_1)) {
449     Nodes_buffer[k-node_0] = set_node;
450 ksteube 1317 }
451     }
452     }
453     }
454 gross 1749 /* count the entries in the Nodes_buffer */
455 ksteube 1317 /* TODO: OMP parallel */
456     myNewNodes=0;
457     for (n=0; n<myNodes; ++n) {
458 gross 1749 if ( Nodes_buffer[n] == set_node) {
459     Nodes_buffer[n]=myNewNodes;
460 ksteube 1317 myNewNodes++;
461     }
462     }
463     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
464     loc_offsets[in->MPIInfo->rank]=myNewNodes;
465     #ifdef PASO_MPI
466     MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
467     globalNumReducedNodes=0;
468     for (n=0; n< in->MPIInfo->size; ++n) {
469     loc_offsets[n]=globalNumReducedNodes;
470     globalNumReducedNodes+=offsets[n];
471     }
472     #else
473     globalNumReducedNodes=loc_offsets[0];
474     loc_offsets[0]=0;
475     #endif
476     #pragma omp parallel for private(n) schedule(static)
477 gross 1749 for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank];
478 ksteube 1317 /* now entries are collected from the buffer again by sending the entries around in a circle */
479     #pragma omp parallel for private(n) schedule(static)
480     for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
481     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
482     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
483     buffer_rank=in->MPIInfo->rank;
484     for (p=0; p< in->MPIInfo->size; ++p) {
485 gross 1749 node_0=distribution[buffer_rank];
486     node_1=distribution[buffer_rank+1];
487 ksteube 1317 #pragma omp parallel for private(n,k) schedule(static)
488     for (n=0;n<in->numNodes;n++) {
489 gross 1749 if (reducedNodeMask[n] >-1) {
490     k=in->globalNodesIndex[n];
491     if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0];
492     }
493 ksteube 1317 }
494     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
495     #ifdef PASO_MPI
496 gross 1749 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
497 ksteube 1317 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
498     in->MPIInfo->comm,&status);
499     #endif
500     in->MPIInfo->msg_tag_counter+=1;
501     }
502     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
503     }
504     }
505 gross 1749 TMPMEMFREE(Nodes_buffer);
506 ksteube 1317 }
507     TMPMEMFREE(distribution);
508     TMPMEMFREE(loc_offsets);
509     TMPMEMFREE(offsets);
510     return globalNumReducedNodes;
511     }

  ViewVC Help
Powered by ViewVC 1.1.26