/[escript]/trunk-mpi-branch/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Annotation of /trunk-mpi-branch/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1272 - (hide annotations)
Fri Aug 24 00:40:43 2007 UTC (12 years, 2 months ago) by gross
File MIME type: text/plain
File size: 23867 byte(s)
some bugs in the node-node interpolation fixed
1 gross 1223 /*
2     ************************************************************
3     * Copyright 2006 by ACcESS MNRF *
4     * *
5     * http://www.access.edu.au *
6     * Primary Business: Queensland, Australia *
7     * Licensed under the Open Software License version 3.0 *
8     * http://www.opensource.org/licenses/osl-3.0.php *
9     * *
10     ************************************************************
11     */
12    
13     /**************************************************************/
14    
15     /* Finley: Mesh: NodeFile */
16    
17     /* creates a dense labeling of the global degrees of freedom */
18     /* and returns the new number of global degrees of freedom */
19    
20     /**************************************************************/
21    
22     /* Author: gross@access.edu.au */
23     /* Version: $Id:$ */
24    
25     /**************************************************************/
26    
27     #include "NodeFile.h"
28    
29     /**************************************************************/
30    
31     dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
32     {
33     index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
34     Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
35     dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
36 gross 1230 bool_t *set_new_DOF=NULL;
37 gross 1223 #ifdef PASO_MPI
38     MPI_Status status;
39     #endif
40    
41     /* get the global range of node ids */
42     Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
43    
44     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
45     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
46     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
47 gross 1230 set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
48 gross 1223
49 gross 1230 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
50 gross 1223 /* distribute the range of node ids */
51     buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
52     myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
53     /* allocate buffers */
54     DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
55     if (! Finley_checkPtr(DOF_buffer)) {
56     /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
57     #pragma omp parallel for private(n) schedule(static)
58     for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
59    
60     /* fill the buffer by sending portions around in a circle */
61     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
62     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
63     buffer_rank=in->MPIInfo->rank;
64     for (p=0; p< in->MPIInfo->size; ++p) {
65     if (p>0) { /* the initial send can be skipped */
66     #ifdef PASO_MPI
67     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
68     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
69     in->MPIInfo->comm,&status);
70     #endif
71     in->MPIInfo->msg_tag_counter++;
72     }
73     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
74     dof_0=distribution[buffer_rank];
75     dof_1=distribution[buffer_rank+1];
76     #pragma omp parallel for private(n,k) schedule(static)
77     for (n=0;n<in->numNodes;n++) {
78     k=in->globalDegreesOfFreedom[n];
79     if ((dof_0<=k) && (k<dof_1)) {
80     DOF_buffer[k-dof_0] = set_dof;
81     }
82     }
83     }
84     /* count the entries in the DOF_buffer */
85     /* TODO: OMP parallel */
86     myNewDOFs=0;
87     for (n=0; n<myDOFs; ++n) {
88     if ( DOF_buffer[n] == set_dof) {
89     DOF_buffer[n]=myNewDOFs;
90     myNewDOFs++;
91     }
92     }
93     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
94     loc_offsets[in->MPIInfo->rank]=myNewDOFs;
95     #ifdef PASO_MPI
96     MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
97     new_numGlobalDOFs=0;
98     for (n=0; n< in->MPIInfo->size; ++n) {
99     loc_offsets[n]=new_numGlobalDOFs;
100     new_numGlobalDOFs+=offsets[n];
101     }
102     #else
103     new_numGlobalDOFs=loc_offsets[0];
104 gross 1260 loc_offsets[0]=0;
105 gross 1223 #endif
106     #pragma omp parallel for private(n) schedule(static)
107     for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
108     /* now entries are collected from the buffer again by sending the entries around in a circle */
109     #pragma omp parallel for private(n) schedule(static)
110 gross 1230 for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
111 gross 1223 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
112     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
113     buffer_rank=in->MPIInfo->rank;
114     for (p=0; p< in->MPIInfo->size; ++p) {
115     dof_0=distribution[buffer_rank];
116     dof_1=distribution[buffer_rank+1];
117     #pragma omp parallel for private(n,k) schedule(static)
118     for (n=0;n<in->numNodes;n++) {
119     k=in->globalDegreesOfFreedom[n];
120 gross 1230 if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
121 gross 1223 in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
122 gross 1230 set_new_DOF[n]=FALSE;
123 gross 1223 }
124     }
125     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
126     #ifdef PASO_MPI
127     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
128     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
129     in->MPIInfo->comm,&status);
130     #endif
131     in->MPIInfo->msg_tag_counter+=1;
132     }
133     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
134     }
135     }
136     TMPMEMFREE(DOF_buffer);
137     }
138     TMPMEMFREE(distribution);
139     TMPMEMFREE(loc_offsets);
140     TMPMEMFREE(offsets);
141 gross 1230 TMPMEMFREE(set_new_DOF);
142 gross 1223 return new_numGlobalDOFs;
143     }
144    
145     void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){
146     index_t min_DOF,max_DOF, k;
147     dim_t n;
148     Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
149     /* first we calculate the min and max dof on this processor to reduce costs for seraching */
150     Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
151    
152     for (p=0; p<in->MPIInfo->size; ++p) {
153     if (distribution[p]<=min_DOF) p_min=p;
154     if (distribution[p]<=max_DOF) p_max=p;
155     }
156     #pragma omp parallel for private(n,k,p) schedule(static)
157     for (n=0; n<in->numNodes; ++n) {
158     k=in->globalDegreesOfFreedom[n];
159     for (p=p_min; p<=p_max; ++p) {
160     if (k<distribution[p+1]) {
161     mpiRankOfDOF[n]=p;
162     break;
163     }
164     }
165     }
166     }
167 gross 1230 dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
168     {
169     index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
170     Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
171     dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
172     #ifdef PASO_MPI
173     MPI_Status status;
174     #endif
175    
176     /* get the global range of node ids */
177     Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
178    
179     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
180     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
181     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
182    
183     if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
184     /* distribute the range of node ids */
185     buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
186     myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
187     /* allocate buffers */
188     DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
189     if (! Finley_checkPtr(DOF_buffer)) {
190     /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
191     #pragma omp parallel for private(n) schedule(static)
192     for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
193    
194     /* fill the buffer by sending portions around in a circle */
195     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
196     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
197     buffer_rank=in->MPIInfo->rank;
198     for (p=0; p< in->MPIInfo->size; ++p) {
199     if (p>0) { /* the initial send can be skipped */
200     #ifdef PASO_MPI
201     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
202     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
203     in->MPIInfo->comm,&status);
204     #endif
205     in->MPIInfo->msg_tag_counter++;
206     }
207     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
208     dof_0=distribution[buffer_rank];
209     dof_1=distribution[buffer_rank+1];
210     #pragma omp parallel for private(n,k) schedule(static)
211     for (n=0;n<in->numNodes;n++) {
212     if (reducedNodeMask[n] >-1) {
213     k=in->globalDegreesOfFreedom[n];
214     if ((dof_0<=k) && (k<dof_1)) {
215     DOF_buffer[k-dof_0] = set_dof;
216     }
217     }
218     }
219     }
220     /* count the entries in the DOF_buffer */
221     /* TODO: OMP parallel */
222     myNewDOFs=0;
223     for (n=0; n<myDOFs; ++n) {
224     if ( DOF_buffer[n] == set_dof) {
225     DOF_buffer[n]=myNewDOFs;
226     myNewDOFs++;
227     }
228     }
229     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
230     loc_offsets[in->MPIInfo->rank]=myNewDOFs;
231     #ifdef PASO_MPI
232     MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
233     globalNumReducedDOFs=0;
234     for (n=0; n< in->MPIInfo->size; ++n) {
235     loc_offsets[n]=globalNumReducedDOFs;
236     globalNumReducedDOFs+=offsets[n];
237     }
238     #else
239     globalNumReducedDOFs=loc_offsets[0];
240 gross 1266 loc_offsets[0]=0;
241 gross 1230 #endif
242     #pragma omp parallel for private(n) schedule(static)
243     for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
244     /* now entries are collected from the buffer again by sending the entries around in a circle */
245     #pragma omp parallel for private(n) schedule(static)
246 gross 1272 for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
247 gross 1230 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
248     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
249     buffer_rank=in->MPIInfo->rank;
250     for (p=0; p< in->MPIInfo->size; ++p) {
251     dof_0=distribution[buffer_rank];
252     dof_1=distribution[buffer_rank+1];
253     #pragma omp parallel for private(n,k) schedule(static)
254     for (n=0;n<in->numNodes;n++) {
255     if (reducedNodeMask[n] >-1) {
256     k=in->globalDegreesOfFreedom[n];
257     if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
258     }
259     }
260     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
261     #ifdef PASO_MPI
262     MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
263     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
264     in->MPIInfo->comm,&status);
265     #endif
266     in->MPIInfo->msg_tag_counter+=1;
267     }
268     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
269     }
270     }
271     TMPMEMFREE(DOF_buffer);
272     }
273     TMPMEMFREE(distribution);
274     TMPMEMFREE(loc_offsets);
275     TMPMEMFREE(offsets);
276     return globalNumReducedDOFs;
277     }
278     dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in)
279     {
280     index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k;
281     Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
282     dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumNodes=0, myNewNodes;
283     #ifdef PASO_MPI
284     MPI_Status status;
285     #endif
286    
287     /* get the global range of node ids */
288     Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in);
289    
290     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
291     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
292     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
293    
294     if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets)) ) {
295     /* distribute the range of node ids */
296     buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution);
297     myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
298     /* allocate buffers */
299     Node_buffer=TMPMEMALLOC(buffer_len,index_t);
300     if (! Finley_checkPtr(Node_buffer)) {
301     /* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */
302     #pragma omp parallel for private(n) schedule(static)
303     for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID;
304    
305     /* fill the buffer by sending portions around in a circle */
306     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
307     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
308     buffer_rank=in->MPIInfo->rank;
309     for (p=0; p< in->MPIInfo->size; ++p) {
310     if (p>0) { /* the initial send can be skipped */
311     #ifdef PASO_MPI
312     MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
313     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
314     in->MPIInfo->comm,&status);
315     #endif
316     in->MPIInfo->msg_tag_counter++;
317     }
318     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
319     nodeID_0=distribution[buffer_rank];
320     nodeID_1=distribution[buffer_rank+1];
321     #pragma omp parallel for private(n,k) schedule(static)
322     for (n=0;n<in->numNodes;n++) {
323     k=in->Id[n];
324     if ((nodeID_0<=k) && (k<nodeID_1)) {
325     Node_buffer[k-nodeID_0] = set_nodeID;
326     }
327     }
328     }
329     /* count the entries in the Node_buffer */
330     /* TODO: OMP parallel */
331     myNewNodes=0;
332     for (n=0; n<myNodes; ++n) {
333     if ( Node_buffer[n] == set_nodeID) {
334     Node_buffer[n]=myNewNodes;
335     myNewNodes++;
336     }
337     }
338     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
339     loc_offsets[in->MPIInfo->rank]=myNewNodes;
340     #ifdef PASO_MPI
341     MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
342     globalNumNodes=0;
343     for (n=0; n< in->MPIInfo->size; ++n) {
344     loc_offsets[n]=globalNumNodes;
345     globalNumNodes+=offsets[n];
346     }
347     #else
348     globalNumNodes=loc_offsets[0];
349 gross 1266 loc_offsets[0]=0;
350 gross 1230 #endif
351     #pragma omp parallel for private(n) schedule(static)
352     for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank];
353     /* now entries are collected from the buffer again by sending the entries around in a circle */
354     #pragma omp parallel for private(n) schedule(static)
355 gross 1272 for (n=0; n<in->numNodes; ++n) in->globalNodesIndex[n]=loc_offsets[0]-1;
356 gross 1230 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
357     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
358     buffer_rank=in->MPIInfo->rank;
359     for (p=0; p< in->MPIInfo->size; ++p) {
360     nodeID_0=distribution[buffer_rank];
361     nodeID_1=distribution[buffer_rank+1];
362     #pragma omp parallel for private(n,k) schedule(static)
363     for (n=0;n<in->numNodes;n++) {
364     k=in->Id[n];
365     if ( ( nodeID_0<=k) && (k<nodeID_1) ) in->globalNodesIndex[n]=Node_buffer[k-nodeID_0];
366     }
367     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
368     #ifdef PASO_MPI
369     MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
370     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
371     in->MPIInfo->comm,&status);
372     #endif
373     in->MPIInfo->msg_tag_counter+=1;
374     }
375     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
376     }
377     }
378     TMPMEMFREE(Node_buffer);
379     }
380     TMPMEMFREE(distribution);
381     TMPMEMFREE(loc_offsets);
382     TMPMEMFREE(offsets);
383     return globalNumNodes;
384     }
385    
386     dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in, index_t* reducedNodeMask)
387     {
388     index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k;
389     Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
390     dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
391     #ifdef PASO_MPI
392     MPI_Status status;
393     #endif
394    
395     /* get the global range of node ids */
396     Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in);
397    
398     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
399     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
400     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
401    
402     if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) )) {
403     /* distribute the range of node ids */
404     buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution);
405     myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
406     /* allocate buffers */
407     Node_buffer=TMPMEMALLOC(buffer_len,index_t);
408     if (! Finley_checkPtr(Node_buffer)) {
409     /* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */
410     #pragma omp parallel for private(n) schedule(static)
411     for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID;
412    
413     /* fill the buffer by sending portions around in a circle */
414     dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
415     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
416     buffer_rank=in->MPIInfo->rank;
417     for (p=0; p< in->MPIInfo->size; ++p) {
418     if (p>0) { /* the initial send can be skipped */
419     #ifdef PASO_MPI
420     MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
421     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
422     in->MPIInfo->comm,&status);
423     #endif
424     in->MPIInfo->msg_tag_counter++;
425     }
426     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
427     nodeID_0=distribution[buffer_rank];
428     nodeID_1=distribution[buffer_rank+1];
429     #pragma omp parallel for private(n,k) schedule(static)
430     for (n=0;n<in->numNodes;n++) {
431     if (reducedNodeMask[n] >-1) {
432     k=in->Id[n];
433     if ((nodeID_0<=k) && (k<nodeID_1)) {
434     Node_buffer[k-nodeID_0] = set_nodeID;
435     }
436     }
437     }
438     }
439     /* count the entries in the Node_buffer */
440     /* TODO: OMP parallel */
441     myNewNodes=0;
442     for (n=0; n<myNodes; ++n) {
443     if ( Node_buffer[n] == set_nodeID) {
444     Node_buffer[n]=myNewNodes;
445     myNewNodes++;
446     }
447     }
448     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
449     loc_offsets[in->MPIInfo->rank]=myNewNodes;
450     #ifdef PASO_MPI
451     MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
452     globalNumReducedNodes=0;
453     for (n=0; n< in->MPIInfo->size; ++n) {
454     loc_offsets[n]=globalNumReducedNodes;
455     globalNumReducedNodes+=offsets[n];
456     }
457     #else
458     globalNumReducedNodes=loc_offsets[0];
459 gross 1266 loc_offsets[0]=0;
460 gross 1230 #endif
461     #pragma omp parallel for private(n) schedule(static)
462     for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank];
463     /* now entries are collected from the buffer again by sending the entries around in a circle */
464     #pragma omp parallel for private(n) schedule(static)
465 gross 1272 for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
466 gross 1230 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
467     source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
468     buffer_rank=in->MPIInfo->rank;
469     for (p=0; p< in->MPIInfo->size; ++p) {
470     nodeID_0=distribution[buffer_rank];
471     nodeID_1=distribution[buffer_rank+1];
472     #pragma omp parallel for private(n,k) schedule(static)
473     for (n=0;n<in->numNodes;n++) {
474     if (reducedNodeMask[n] >-1) {
475     k=in->Id[n];
476     if ( (nodeID_0<=k) && (k<nodeID_1)) in->globalReducedNodesIndex[n]=Node_buffer[k-nodeID_0];
477     }
478     }
479     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
480     #ifdef PASO_MPI
481     MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
482     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
483     in->MPIInfo->comm,&status);
484     #endif
485     in->MPIInfo->msg_tag_counter+=1;
486     }
487     buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
488     }
489     }
490     TMPMEMFREE(Node_buffer);
491     }
492     TMPMEMFREE(distribution);
493     TMPMEMFREE(loc_offsets);
494     TMPMEMFREE(offsets);
495     return globalNumReducedNodes;
496     }

  ViewVC Help
Powered by ViewVC 1.1.26