/[escript]/branches/domexper/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Annotation of /branches/domexper/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3234 - (hide annotations)
Mon Oct 4 01:46:30 2010 UTC (9 years, 3 months ago) by jfenwick
File MIME type: text/plain
File size: 23175 byte(s)
Some subdirs need to have changes pulled over but all of the unit tests 
except for modellib appear to work

1 ksteube 1317
2     /*******************************************************
3 ksteube 1811 *
4 jfenwick 2881 * Copyright (c) 2003-2010 by University of Queensland
5 ksteube 1811 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 ksteube 1317
14 ksteube 1811
15 ksteube 1317 /**************************************************************/
16    
17     /* Finley: Mesh: NodeFile */
18    
19     /* creates a dense labeling of the global degrees of freedom */
20     /* and returns the new number of global degrees of freedom */
21    
22     /**************************************************************/
23    
24     #include "NodeFile.h"
25    
26     /**************************************************************/
27    
28     dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
29     {
30     index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
31 jfenwick 3234 Esys_MPI_rank buffer_rank, dest, source, *distribution=NULL;
32 ksteube 1317 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
33     bool_t *set_new_DOF=NULL;
34 jfenwick 3234 #ifdef ESYS_MPI
35 ksteube 1317 MPI_Status status;
36     #endif
37    
38     /* get the global range of node ids */
39     Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
40    
41     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
42     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
43     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
44     set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
45    
46     if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
47     /* distribute the range of node ids */
48 jfenwick 3234 buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
49 ksteube 1317 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
50     /* allocate buffers */
51     DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
52     if (! Finley_checkPtr(DOF_buffer)) {
53     /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
54     #pragma omp parallel for private(n) schedule(static)
55     for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
56    
57     /* fill the buffer by sending portions around in a circle */
58 jfenwick 3234 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
59     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
60 ksteube 1317 buffer_rank=in->MPIInfo->rank;
61     for (p=0; p< in->MPIInfo->size; ++p) {
62     if (p>0) { /* the initial send can be skipped */
63 jfenwick 3234 #ifdef ESYS_MPI
64 ksteube 1317 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
65     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
66     in->MPIInfo->comm,&status);
67     #endif
68     in->MPIInfo->msg_tag_counter++;
69     }
70 jfenwick 3234 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
71 ksteube 1317 dof_0=distribution[buffer_rank];
72     dof_1=distribution[buffer_rank+1];
73     #pragma omp parallel for private(n,k) schedule(static)
74     for (n=0;n<in->numNodes;n++) {
75     k=in->globalDegreesOfFreedom[n];
76     if ((dof_0<=k) && (k<dof_1)) {
77     DOF_buffer[k-dof_0] = set_dof;
78     }
79     }
80     }
81     /* count the entries in the DOF_buffer */
82     /* TODO: OMP parallel */
83     myNewDOFs=0;
84     for (n=0; n<myDOFs; ++n) {
85     if ( DOF_buffer[n] == set_dof) {
86     DOF_buffer[n]=myNewDOFs;
87     myNewDOFs++;
88     }
89     }
90     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
91     loc_offsets[in->MPIInfo->rank]=myNewDOFs;
92 jfenwick 3234 #ifdef ESYS_MPI
93 ksteube 1317 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
94     new_numGlobalDOFs=0;
95     for (n=0; n< in->MPIInfo->size; ++n) {
96     loc_offsets[n]=new_numGlobalDOFs;
97     new_numGlobalDOFs+=offsets[n];
98     }
99     #else
100     new_numGlobalDOFs=loc_offsets[0];
101     loc_offsets[0]=0;
102     #endif
103 gross 1766 #pragma omp parallel
104     {
105     #pragma omp for private(n) schedule(static)
106     for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
107     /* now entries are collected from the buffer again by sending the entries around in a circle */
108     #pragma omp for private(n) schedule(static)
109     for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
110     }
111 jfenwick 3234 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
112     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
113 ksteube 1317 buffer_rank=in->MPIInfo->rank;
114     for (p=0; p< in->MPIInfo->size; ++p) {
115     dof_0=distribution[buffer_rank];
116     dof_1=distribution[buffer_rank+1];
117     #pragma omp parallel for private(n,k) schedule(static)
118     for (n=0;n<in->numNodes;n++) {
119     k=in->globalDegreesOfFreedom[n];
120     if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
121     in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
122     set_new_DOF[n]=FALSE;
123     }
124     }
125     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
126 jfenwick 3234 #ifdef ESYS_MPI
127 ksteube 1317 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
128     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
129     in->MPIInfo->comm,&status);
130     #endif
131     in->MPIInfo->msg_tag_counter+=1;
132     }
133 jfenwick 3234 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
134 ksteube 1317 }
135     }
136     TMPMEMFREE(DOF_buffer);
137     }
138     TMPMEMFREE(distribution);
139     TMPMEMFREE(loc_offsets);
140     TMPMEMFREE(offsets);
141     TMPMEMFREE(set_new_DOF);
142     return new_numGlobalDOFs;
143     }
144    
145 jfenwick 3234 void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Esys_MPI_rank* mpiRankOfDOF, index_t *distribution){
146 ksteube 1317 index_t min_DOF,max_DOF, k;
147     dim_t n;
148 jfenwick 3234 Esys_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
149 ksteube 1317 /* first we calculate the min and max dof on this processor to reduce costs for seraching */
150     Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
151    
152     for (p=0; p<in->MPIInfo->size; ++p) {
153     if (distribution[p]<=min_DOF) p_min=p;
154     if (distribution[p]<=max_DOF) p_max=p;
155     }
156     #pragma omp parallel for private(n,k,p) schedule(static)
157     for (n=0; n<in->numNodes; ++n) {
158     k=in->globalDegreesOfFreedom[n];
159     for (p=p_min; p<=p_max; ++p) {
160     if (k<distribution[p+1]) {
161     mpiRankOfDOF[n]=p;
162     break;
163     }
164     }
165     }
166     }
167     dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
168     {
169     index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
170 jfenwick 3234 Esys_MPI_rank buffer_rank, dest, source, *distribution=NULL;
171 ksteube 1317 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
172 jfenwick 3234 #ifdef ESYS_MPI
173 ksteube 1317 MPI_Status status;
174     #endif
175    
176     /* get the global range of node ids */
177     Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
178    
179     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
180     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
181     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
182    
183     if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
184     /* distribute the range of node ids */
185 jfenwick 3234 buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
186 ksteube 1317 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
187     /* allocate buffers */
188     DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
189     if (! Finley_checkPtr(DOF_buffer)) {
190     /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
191     #pragma omp parallel for private(n) schedule(static)
192     for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
193    
194     /* fill the buffer by sending portions around in a circle */
195 jfenwick 3234 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
196     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
197 ksteube 1317 buffer_rank=in->MPIInfo->rank;
198     for (p=0; p< in->MPIInfo->size; ++p) {
199     if (p>0) { /* the initial send can be skipped */
200 jfenwick 3234 #ifdef ESYS_MPI
201 ksteube 1317 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
202     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
203     in->MPIInfo->comm,&status);
204     #endif
205     in->MPIInfo->msg_tag_counter++;
206     }
207 jfenwick 3234 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
208 ksteube 1317 dof_0=distribution[buffer_rank];
209     dof_1=distribution[buffer_rank+1];
210     #pragma omp parallel for private(n,k) schedule(static)
211     for (n=0;n<in->numNodes;n++) {
212     if (reducedNodeMask[n] >-1) {
213     k=in->globalDegreesOfFreedom[n];
214     if ((dof_0<=k) && (k<dof_1)) {
215     DOF_buffer[k-dof_0] = set_dof;
216     }
217     }
218     }
219     }
220     /* count the entries in the DOF_buffer */
221     /* TODO: OMP parallel */
222     myNewDOFs=0;
223     for (n=0; n<myDOFs; ++n) {
224     if ( DOF_buffer[n] == set_dof) {
225     DOF_buffer[n]=myNewDOFs;
226     myNewDOFs++;
227     }
228     }
229     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
230     loc_offsets[in->MPIInfo->rank]=myNewDOFs;
231 jfenwick 3234 #ifdef ESYS_MPI
232 ksteube 1317 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
233     globalNumReducedDOFs=0;
234     for (n=0; n< in->MPIInfo->size; ++n) {
235     loc_offsets[n]=globalNumReducedDOFs;
236     globalNumReducedDOFs+=offsets[n];
237     }
238     #else
239     globalNumReducedDOFs=loc_offsets[0];
240     loc_offsets[0]=0;
241     #endif
242     #pragma omp parallel for private(n) schedule(static)
243     for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
244     /* now entries are collected from the buffer again by sending the entries around in a circle */
245     #pragma omp parallel for private(n) schedule(static)
246     for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
247 jfenwick 3234 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
248     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
249 ksteube 1317 buffer_rank=in->MPIInfo->rank;
250     for (p=0; p< in->MPIInfo->size; ++p) {
251     dof_0=distribution[buffer_rank];
252     dof_1=distribution[buffer_rank+1];
253     #pragma omp parallel for private(n,k) schedule(static)
254     for (n=0;n<in->numNodes;n++) {
255     if (reducedNodeMask[n] >-1) {
256     k=in->globalDegreesOfFreedom[n];
257     if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
258     }
259     }
260     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
261 jfenwick 3234 #ifdef ESYS_MPI
262 ksteube 1317 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
263     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
264     in->MPIInfo->comm,&status);
265     #endif
266     in->MPIInfo->msg_tag_counter+=1;
267     }
268 jfenwick 3234 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
269 ksteube 1317 }
270     }
271     TMPMEMFREE(DOF_buffer);
272     }
273     TMPMEMFREE(distribution);
274     TMPMEMFREE(loc_offsets);
275     TMPMEMFREE(offsets);
276     return globalNumReducedDOFs;
277     }
278 gross 1749 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution)
279 ksteube 1317 {
280 gross 1749 index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL;
281 jfenwick 1981 dim_t n, my_buffer_len, buffer_len, globalNumNodes=0, myNewNumNodes;
282 jfenwick 3234 Esys_MPI_rank p, dest, source, buffer_rank;
283 gross 1749 const index_t unset_nodeID=-1, set_nodeID=1;
284     const dim_t header_len=2;
285 jfenwick 3234 #ifdef ESYS_MPI
286 ksteube 1317 MPI_Status status;
287     #endif
288 jfenwick 3234 Esys_MPI_rank myRank=in->MPIInfo->rank;
289 ksteube 1317
290 gross 1749 /* find the range of node ids controled by me */
291 ksteube 1317
292 jfenwick 1981 myFirstDOF=dof_distribution[myRank];
293     myLastDOF=dof_distribution[myRank+1];
294 gross 1749 max_id=-INDEX_T_MAX;
295     min_id=INDEX_T_MAX;
296     #pragma omp parallel private(loc_max_id,loc_min_id)
297     {
298     loc_max_id=max_id;
299     loc_min_id=min_id;
300     #pragma omp for private(n,dof,id) schedule(static)
301     for (n=0;n<in->numNodes;n++) {
302     dof=in->globalDegreesOfFreedom[n];
303     id=in->Id[n];
304     if ((myFirstDOF<= dof) && (dof< myLastDOF)) {
305     loc_max_id=MAX(loc_max_id,id);
306     loc_min_id=MIN(loc_min_id,id);
307     }
308     }
309     #pragma omp critical
310     {
311     max_id=MAX(loc_max_id,max_id);
312     min_id=MIN(loc_min_id,min_id);
313     }
314     }
315     /* allocate a buffer */
316 gross 2385 my_buffer_len=max_id>=min_id ? max_id-min_id+1 :0;
317 ksteube 1317
318 jfenwick 3234 #ifdef ESYS_MPI
319 gross 1749 MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm );
320     #else
321     buffer_len=my_buffer_len;
322     #endif
323    
324     Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t);
325     if (! Finley_checkPtr(Node_buffer)) {
326     /* mark and count the nodes in use */
327     #pragma omp parallel
328     {
329     #pragma omp for private(n) schedule(static)
330 gross 1774 for (n=0;n<buffer_len+header_len;n++) Node_buffer[n]=unset_nodeID;
331 gross 1749 #pragma omp for private(n) schedule(static)
332     for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1;
333     #pragma omp for private(n,dof,id) schedule(static)
334     for (n=0;n<in->numNodes;n++) {
335     dof=in->globalDegreesOfFreedom[n];
336     id=in->Id[n];
337     if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID;
338     }
339     }
340     myNewNumNodes=0;
341     for (n=0;n<my_buffer_len;n++) {
342     if (Node_buffer[header_len+n]==set_nodeID) {
343     Node_buffer[header_len+n]=myNewNumNodes;
344     myNewNumNodes++;
345 ksteube 1317 }
346 gross 1749 }
347     /* make the local number of nodes globally available */
348 jfenwick 3234 #ifdef ESYS_MPI
349 gross 1749 MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm);
350     #else
351     node_distribution[0]=myNewNumNodes;
352     #endif
353 gross 2385
354    
355 gross 1749 globalNumNodes=0;
356     for (p=0; p< in->MPIInfo->size; ++p) {
357     itmp=node_distribution[p];
358     node_distribution[p]=globalNumNodes;
359     globalNumNodes+=itmp;
360     }
361     node_distribution[in->MPIInfo->size]=globalNumNodes;
362    
363     /* offset nodebuffer */
364     itmp=node_distribution[in->MPIInfo->rank];
365     #pragma omp for private(n) schedule(static)
366     for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp;
367    
368 gross 2385
369 gross 1749 /* now we send this buffer around to assign global node index: */
370 jfenwick 3234 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
371     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
372 gross 1749 Node_buffer[0]=min_id;
373     Node_buffer[1]=max_id;
374     buffer_rank=in->MPIInfo->rank;
375     for (p=0; p< in->MPIInfo->size; ++p) {
376     nodeID_0=Node_buffer[0];
377     nodeID_1=Node_buffer[1];
378     dof_0=dof_distribution[buffer_rank];
379     dof_1=dof_distribution[buffer_rank+1];
380     if (nodeID_0<=nodeID_1) {
381     #pragma omp for private(n,dof,id) schedule(static)
382     for (n=0;n<in->numNodes;n++) {
383     dof=in->globalDegreesOfFreedom[n];
384     id=in->Id[n]-nodeID_0;
385     if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len];
386 ksteube 1317 }
387 gross 1749 }
388     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
389 jfenwick 3234 #ifdef ESYS_MPI
390 gross 1749 MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT,
391     dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
392     in->MPIInfo->comm,&status);
393     #endif
394     in->MPIInfo->msg_tag_counter+=1;
395     }
396 jfenwick 3234 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
397 gross 1749 }
398     }
399     TMPMEMFREE(Node_buffer);
400 gross 1766 return globalNumNodes;
401 ksteube 1317 }
402    
403 gross 1749 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
404 ksteube 1317 {
405 gross 1749 index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k;
406 jfenwick 3234 Esys_MPI_rank buffer_rank, dest, source, *distribution=NULL;
407 ksteube 1317 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
408 jfenwick 3234 #ifdef ESYS_MPI
409 ksteube 1317 MPI_Status status;
410     #endif
411    
412     /* get the global range of node ids */
413 gross 1749 Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in);
414 ksteube 1317
415     distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
416     offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
417     loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
418    
419 gross 1749 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
420 ksteube 1317 /* distribute the range of node ids */
421 jfenwick 3234 buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution);
422 ksteube 1317 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
423     /* allocate buffers */
424 gross 1749 Nodes_buffer=TMPMEMALLOC(buffer_len,index_t);
425     if (! Finley_checkPtr(Nodes_buffer)) {
426     /* fill Nodes_buffer by the unset_node marker to check if nodes are defined */
427 ksteube 1317 #pragma omp parallel for private(n) schedule(static)
428 gross 1749 for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node;
429 ksteube 1317
430     /* fill the buffer by sending portions around in a circle */
431 jfenwick 3234 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
432     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
433 ksteube 1317 buffer_rank=in->MPIInfo->rank;
434     for (p=0; p< in->MPIInfo->size; ++p) {
435     if (p>0) { /* the initial send can be skipped */
436 jfenwick 3234 #ifdef ESYS_MPI
437 gross 1749 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
438 ksteube 1317 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
439     in->MPIInfo->comm,&status);
440     #endif
441     in->MPIInfo->msg_tag_counter++;
442     }
443 jfenwick 3234 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
444 gross 1749 node_0=distribution[buffer_rank];
445     node_1=distribution[buffer_rank+1];
446 ksteube 1317 #pragma omp parallel for private(n,k) schedule(static)
447     for (n=0;n<in->numNodes;n++) {
448     if (reducedNodeMask[n] >-1) {
449 gross 1749 k=in->globalNodesIndex[n];
450     if ((node_0<=k) && (k<node_1)) {
451     Nodes_buffer[k-node_0] = set_node;
452 ksteube 1317 }
453     }
454     }
455     }
456 gross 1749 /* count the entries in the Nodes_buffer */
457 ksteube 1317 /* TODO: OMP parallel */
458     myNewNodes=0;
459     for (n=0; n<myNodes; ++n) {
460 gross 1749 if ( Nodes_buffer[n] == set_node) {
461     Nodes_buffer[n]=myNewNodes;
462 ksteube 1317 myNewNodes++;
463     }
464     }
465     memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
466     loc_offsets[in->MPIInfo->rank]=myNewNodes;
467 jfenwick 3234 #ifdef ESYS_MPI
468 ksteube 1317 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
469     globalNumReducedNodes=0;
470     for (n=0; n< in->MPIInfo->size; ++n) {
471     loc_offsets[n]=globalNumReducedNodes;
472     globalNumReducedNodes+=offsets[n];
473     }
474     #else
475     globalNumReducedNodes=loc_offsets[0];
476     loc_offsets[0]=0;
477     #endif
478     #pragma omp parallel for private(n) schedule(static)
479 gross 1749 for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank];
480 ksteube 1317 /* now entries are collected from the buffer again by sending the entries around in a circle */
481     #pragma omp parallel for private(n) schedule(static)
482     for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
483 jfenwick 3234 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
484     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
485 ksteube 1317 buffer_rank=in->MPIInfo->rank;
486     for (p=0; p< in->MPIInfo->size; ++p) {
487 gross 1749 node_0=distribution[buffer_rank];
488     node_1=distribution[buffer_rank+1];
489 ksteube 1317 #pragma omp parallel for private(n,k) schedule(static)
490     for (n=0;n<in->numNodes;n++) {
491 gross 1749 if (reducedNodeMask[n] >-1) {
492     k=in->globalNodesIndex[n];
493     if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0];
494     }
495 ksteube 1317 }
496     if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
497 jfenwick 3234 #ifdef ESYS_MPI
498 gross 1749 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
499 ksteube 1317 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
500     in->MPIInfo->comm,&status);
501     #endif
502     in->MPIInfo->msg_tag_counter+=1;
503     }
504 jfenwick 3234 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
505 ksteube 1317 }
506     }
507 gross 1749 TMPMEMFREE(Nodes_buffer);
508 ksteube 1317 }
509     TMPMEMFREE(distribution);
510     TMPMEMFREE(loc_offsets);
511     TMPMEMFREE(offsets);
512     return globalNumReducedNodes;
513     }

  ViewVC Help
Powered by ViewVC 1.1.26