/[escript]/trunk-mpi-branch/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Contents of /trunk-mpi-branch/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1272 - (show annotations)
Fri Aug 24 00:40:43 2007 UTC (11 years, 8 months ago) by gross
File MIME type: text/plain
File size: 23867 byte(s)
some bugs in the node-node interpolation fixed
1 /*
2 ************************************************************
3 * Copyright 2006 by ACcESS MNRF *
4 * *
5 * http://www.access.edu.au *
6 * Primary Business: Queensland, Australia *
7 * Licensed under the Open Software License version 3.0 *
8 * http://www.opensource.org/licenses/osl-3.0.php *
9 * *
10 ************************************************************
11 */
12
13 /**************************************************************/
14
15 /* Finley: Mesh: NodeFile */
16
17 /* creates a dense labeling of the global degrees of freedom */
18 /* and returns the new number of global degrees of freedom */
19
20 /**************************************************************/
21
22 /* Author: gross@access.edu.au */
23 /* Version: $Id:$ */
24
25 /**************************************************************/
26
27 #include "NodeFile.h"
28
29 /**************************************************************/
30
31 dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
32 {
33 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
34 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
35 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
36 bool_t *set_new_DOF=NULL;
37 #ifdef PASO_MPI
38 MPI_Status status;
39 #endif
40
41 /* get the global range of node ids */
42 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
43
44 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
45 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
46 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
47 set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
48
49 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
50 /* distribute the range of node ids */
51 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
52 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
53 /* allocate buffers */
54 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
55 if (! Finley_checkPtr(DOF_buffer)) {
56 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
57 #pragma omp parallel for private(n) schedule(static)
58 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
59
60 /* fill the buffer by sending portions around in a circle */
61 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
62 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
63 buffer_rank=in->MPIInfo->rank;
64 for (p=0; p< in->MPIInfo->size; ++p) {
65 if (p>0) { /* the initial send can be skipped */
66 #ifdef PASO_MPI
67 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
68 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
69 in->MPIInfo->comm,&status);
70 #endif
71 in->MPIInfo->msg_tag_counter++;
72 }
73 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
74 dof_0=distribution[buffer_rank];
75 dof_1=distribution[buffer_rank+1];
76 #pragma omp parallel for private(n,k) schedule(static)
77 for (n=0;n<in->numNodes;n++) {
78 k=in->globalDegreesOfFreedom[n];
79 if ((dof_0<=k) && (k<dof_1)) {
80 DOF_buffer[k-dof_0] = set_dof;
81 }
82 }
83 }
84 /* count the entries in the DOF_buffer */
85 /* TODO: OMP parallel */
86 myNewDOFs=0;
87 for (n=0; n<myDOFs; ++n) {
88 if ( DOF_buffer[n] == set_dof) {
89 DOF_buffer[n]=myNewDOFs;
90 myNewDOFs++;
91 }
92 }
93 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
94 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
95 #ifdef PASO_MPI
96 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
97 new_numGlobalDOFs=0;
98 for (n=0; n< in->MPIInfo->size; ++n) {
99 loc_offsets[n]=new_numGlobalDOFs;
100 new_numGlobalDOFs+=offsets[n];
101 }
102 #else
103 new_numGlobalDOFs=loc_offsets[0];
104 loc_offsets[0]=0;
105 #endif
106 #pragma omp parallel for private(n) schedule(static)
107 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
108 /* now entries are collected from the buffer again by sending the entries around in a circle */
109 #pragma omp parallel for private(n) schedule(static)
110 for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
111 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
112 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
113 buffer_rank=in->MPIInfo->rank;
114 for (p=0; p< in->MPIInfo->size; ++p) {
115 dof_0=distribution[buffer_rank];
116 dof_1=distribution[buffer_rank+1];
117 #pragma omp parallel for private(n,k) schedule(static)
118 for (n=0;n<in->numNodes;n++) {
119 k=in->globalDegreesOfFreedom[n];
120 if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
121 in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
122 set_new_DOF[n]=FALSE;
123 }
124 }
125 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
126 #ifdef PASO_MPI
127 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
128 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
129 in->MPIInfo->comm,&status);
130 #endif
131 in->MPIInfo->msg_tag_counter+=1;
132 }
133 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
134 }
135 }
136 TMPMEMFREE(DOF_buffer);
137 }
138 TMPMEMFREE(distribution);
139 TMPMEMFREE(loc_offsets);
140 TMPMEMFREE(offsets);
141 TMPMEMFREE(set_new_DOF);
142 return new_numGlobalDOFs;
143 }
144
145 void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){
146 index_t min_DOF,max_DOF, k;
147 dim_t n;
148 Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
149 /* first we calculate the min and max dof on this processor to reduce costs for seraching */
150 Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
151
152 for (p=0; p<in->MPIInfo->size; ++p) {
153 if (distribution[p]<=min_DOF) p_min=p;
154 if (distribution[p]<=max_DOF) p_max=p;
155 }
156 #pragma omp parallel for private(n,k,p) schedule(static)
157 for (n=0; n<in->numNodes; ++n) {
158 k=in->globalDegreesOfFreedom[n];
159 for (p=p_min; p<=p_max; ++p) {
160 if (k<distribution[p+1]) {
161 mpiRankOfDOF[n]=p;
162 break;
163 }
164 }
165 }
166 }
167 dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
168 {
169 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
170 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
171 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
172 #ifdef PASO_MPI
173 MPI_Status status;
174 #endif
175
176 /* get the global range of node ids */
177 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
178
179 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
180 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
181 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
182
183 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
184 /* distribute the range of node ids */
185 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
186 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
187 /* allocate buffers */
188 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
189 if (! Finley_checkPtr(DOF_buffer)) {
190 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
191 #pragma omp parallel for private(n) schedule(static)
192 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
193
194 /* fill the buffer by sending portions around in a circle */
195 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
196 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
197 buffer_rank=in->MPIInfo->rank;
198 for (p=0; p< in->MPIInfo->size; ++p) {
199 if (p>0) { /* the initial send can be skipped */
200 #ifdef PASO_MPI
201 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
202 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
203 in->MPIInfo->comm,&status);
204 #endif
205 in->MPIInfo->msg_tag_counter++;
206 }
207 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
208 dof_0=distribution[buffer_rank];
209 dof_1=distribution[buffer_rank+1];
210 #pragma omp parallel for private(n,k) schedule(static)
211 for (n=0;n<in->numNodes;n++) {
212 if (reducedNodeMask[n] >-1) {
213 k=in->globalDegreesOfFreedom[n];
214 if ((dof_0<=k) && (k<dof_1)) {
215 DOF_buffer[k-dof_0] = set_dof;
216 }
217 }
218 }
219 }
220 /* count the entries in the DOF_buffer */
221 /* TODO: OMP parallel */
222 myNewDOFs=0;
223 for (n=0; n<myDOFs; ++n) {
224 if ( DOF_buffer[n] == set_dof) {
225 DOF_buffer[n]=myNewDOFs;
226 myNewDOFs++;
227 }
228 }
229 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
230 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
231 #ifdef PASO_MPI
232 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
233 globalNumReducedDOFs=0;
234 for (n=0; n< in->MPIInfo->size; ++n) {
235 loc_offsets[n]=globalNumReducedDOFs;
236 globalNumReducedDOFs+=offsets[n];
237 }
238 #else
239 globalNumReducedDOFs=loc_offsets[0];
240 loc_offsets[0]=0;
241 #endif
242 #pragma omp parallel for private(n) schedule(static)
243 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
244 /* now entries are collected from the buffer again by sending the entries around in a circle */
245 #pragma omp parallel for private(n) schedule(static)
246 for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
247 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
248 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
249 buffer_rank=in->MPIInfo->rank;
250 for (p=0; p< in->MPIInfo->size; ++p) {
251 dof_0=distribution[buffer_rank];
252 dof_1=distribution[buffer_rank+1];
253 #pragma omp parallel for private(n,k) schedule(static)
254 for (n=0;n<in->numNodes;n++) {
255 if (reducedNodeMask[n] >-1) {
256 k=in->globalDegreesOfFreedom[n];
257 if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
258 }
259 }
260 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
261 #ifdef PASO_MPI
262 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
263 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
264 in->MPIInfo->comm,&status);
265 #endif
266 in->MPIInfo->msg_tag_counter+=1;
267 }
268 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
269 }
270 }
271 TMPMEMFREE(DOF_buffer);
272 }
273 TMPMEMFREE(distribution);
274 TMPMEMFREE(loc_offsets);
275 TMPMEMFREE(offsets);
276 return globalNumReducedDOFs;
277 }
278 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in)
279 {
280 index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k;
281 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
282 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumNodes=0, myNewNodes;
283 #ifdef PASO_MPI
284 MPI_Status status;
285 #endif
286
287 /* get the global range of node ids */
288 Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in);
289
290 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
291 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
292 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
293
294 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets)) ) {
295 /* distribute the range of node ids */
296 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution);
297 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
298 /* allocate buffers */
299 Node_buffer=TMPMEMALLOC(buffer_len,index_t);
300 if (! Finley_checkPtr(Node_buffer)) {
301 /* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */
302 #pragma omp parallel for private(n) schedule(static)
303 for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID;
304
305 /* fill the buffer by sending portions around in a circle */
306 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
307 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
308 buffer_rank=in->MPIInfo->rank;
309 for (p=0; p< in->MPIInfo->size; ++p) {
310 if (p>0) { /* the initial send can be skipped */
311 #ifdef PASO_MPI
312 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
313 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
314 in->MPIInfo->comm,&status);
315 #endif
316 in->MPIInfo->msg_tag_counter++;
317 }
318 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
319 nodeID_0=distribution[buffer_rank];
320 nodeID_1=distribution[buffer_rank+1];
321 #pragma omp parallel for private(n,k) schedule(static)
322 for (n=0;n<in->numNodes;n++) {
323 k=in->Id[n];
324 if ((nodeID_0<=k) && (k<nodeID_1)) {
325 Node_buffer[k-nodeID_0] = set_nodeID;
326 }
327 }
328 }
329 /* count the entries in the Node_buffer */
330 /* TODO: OMP parallel */
331 myNewNodes=0;
332 for (n=0; n<myNodes; ++n) {
333 if ( Node_buffer[n] == set_nodeID) {
334 Node_buffer[n]=myNewNodes;
335 myNewNodes++;
336 }
337 }
338 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
339 loc_offsets[in->MPIInfo->rank]=myNewNodes;
340 #ifdef PASO_MPI
341 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
342 globalNumNodes=0;
343 for (n=0; n< in->MPIInfo->size; ++n) {
344 loc_offsets[n]=globalNumNodes;
345 globalNumNodes+=offsets[n];
346 }
347 #else
348 globalNumNodes=loc_offsets[0];
349 loc_offsets[0]=0;
350 #endif
351 #pragma omp parallel for private(n) schedule(static)
352 for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank];
353 /* now entries are collected from the buffer again by sending the entries around in a circle */
354 #pragma omp parallel for private(n) schedule(static)
355 for (n=0; n<in->numNodes; ++n) in->globalNodesIndex[n]=loc_offsets[0]-1;
356 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
357 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
358 buffer_rank=in->MPIInfo->rank;
359 for (p=0; p< in->MPIInfo->size; ++p) {
360 nodeID_0=distribution[buffer_rank];
361 nodeID_1=distribution[buffer_rank+1];
362 #pragma omp parallel for private(n,k) schedule(static)
363 for (n=0;n<in->numNodes;n++) {
364 k=in->Id[n];
365 if ( ( nodeID_0<=k) && (k<nodeID_1) ) in->globalNodesIndex[n]=Node_buffer[k-nodeID_0];
366 }
367 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
368 #ifdef PASO_MPI
369 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
370 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
371 in->MPIInfo->comm,&status);
372 #endif
373 in->MPIInfo->msg_tag_counter+=1;
374 }
375 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
376 }
377 }
378 TMPMEMFREE(Node_buffer);
379 }
380 TMPMEMFREE(distribution);
381 TMPMEMFREE(loc_offsets);
382 TMPMEMFREE(offsets);
383 return globalNumNodes;
384 }
385
386 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in, index_t* reducedNodeMask)
387 {
388 index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k;
389 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
390 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
391 #ifdef PASO_MPI
392 MPI_Status status;
393 #endif
394
395 /* get the global range of node ids */
396 Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in);
397
398 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
399 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
400 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
401
402 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) )) {
403 /* distribute the range of node ids */
404 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution);
405 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
406 /* allocate buffers */
407 Node_buffer=TMPMEMALLOC(buffer_len,index_t);
408 if (! Finley_checkPtr(Node_buffer)) {
409 /* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */
410 #pragma omp parallel for private(n) schedule(static)
411 for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID;
412
413 /* fill the buffer by sending portions around in a circle */
414 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
415 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
416 buffer_rank=in->MPIInfo->rank;
417 for (p=0; p< in->MPIInfo->size; ++p) {
418 if (p>0) { /* the initial send can be skipped */
419 #ifdef PASO_MPI
420 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
421 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
422 in->MPIInfo->comm,&status);
423 #endif
424 in->MPIInfo->msg_tag_counter++;
425 }
426 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
427 nodeID_0=distribution[buffer_rank];
428 nodeID_1=distribution[buffer_rank+1];
429 #pragma omp parallel for private(n,k) schedule(static)
430 for (n=0;n<in->numNodes;n++) {
431 if (reducedNodeMask[n] >-1) {
432 k=in->Id[n];
433 if ((nodeID_0<=k) && (k<nodeID_1)) {
434 Node_buffer[k-nodeID_0] = set_nodeID;
435 }
436 }
437 }
438 }
439 /* count the entries in the Node_buffer */
440 /* TODO: OMP parallel */
441 myNewNodes=0;
442 for (n=0; n<myNodes; ++n) {
443 if ( Node_buffer[n] == set_nodeID) {
444 Node_buffer[n]=myNewNodes;
445 myNewNodes++;
446 }
447 }
448 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
449 loc_offsets[in->MPIInfo->rank]=myNewNodes;
450 #ifdef PASO_MPI
451 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
452 globalNumReducedNodes=0;
453 for (n=0; n< in->MPIInfo->size; ++n) {
454 loc_offsets[n]=globalNumReducedNodes;
455 globalNumReducedNodes+=offsets[n];
456 }
457 #else
458 globalNumReducedNodes=loc_offsets[0];
459 loc_offsets[0]=0;
460 #endif
461 #pragma omp parallel for private(n) schedule(static)
462 for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank];
463 /* now entries are collected from the buffer again by sending the entries around in a circle */
464 #pragma omp parallel for private(n) schedule(static)
465 for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
466 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
467 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
468 buffer_rank=in->MPIInfo->rank;
469 for (p=0; p< in->MPIInfo->size; ++p) {
470 nodeID_0=distribution[buffer_rank];
471 nodeID_1=distribution[buffer_rank+1];
472 #pragma omp parallel for private(n,k) schedule(static)
473 for (n=0;n<in->numNodes;n++) {
474 if (reducedNodeMask[n] >-1) {
475 k=in->Id[n];
476 if ( (nodeID_0<=k) && (k<nodeID_1)) in->globalReducedNodesIndex[n]=Node_buffer[k-nodeID_0];
477 }
478 }
479 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
480 #ifdef PASO_MPI
481 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
482 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
483 in->MPIInfo->comm,&status);
484 #endif
485 in->MPIInfo->msg_tag_counter+=1;
486 }
487 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
488 }
489 }
490 TMPMEMFREE(Node_buffer);
491 }
492 TMPMEMFREE(distribution);
493 TMPMEMFREE(loc_offsets);
494 TMPMEMFREE(offsets);
495 return globalNumReducedNodes;
496 }

  ViewVC Help
Powered by ViewVC 1.1.26