/[escript]/trunk/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Contents of /trunk/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1749 - (show annotations)
Wed Sep 3 07:25:01 2008 UTC (10 years, 10 months ago) by gross
File MIME type: text/plain
File size: 23126 byte(s)
fixes a problem with the saveVTK writing under mpi


1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /**************************************************************/
17
18 /* Finley: Mesh: NodeFile */
19
20 /* creates a dense labeling of the global degrees of freedom */
21 /* and returns the new number of global degrees of freedom */
22
23 /**************************************************************/
24
25 #include "NodeFile.h"
26
27 /**************************************************************/
28
29 dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
30 {
31 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
32 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
33 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
34 bool_t *set_new_DOF=NULL;
35 #ifdef PASO_MPI
36 MPI_Status status;
37 #endif
38
39 /* get the global range of node ids */
40 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
41
42 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
43 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
44 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
45 set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
46
47 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
48 /* distribute the range of node ids */
49 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
50 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
51 /* allocate buffers */
52 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
53 if (! Finley_checkPtr(DOF_buffer)) {
54 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
55 #pragma omp parallel for private(n) schedule(static)
56 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
57
58 /* fill the buffer by sending portions around in a circle */
59 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
60 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
61 buffer_rank=in->MPIInfo->rank;
62 for (p=0; p< in->MPIInfo->size; ++p) {
63 if (p>0) { /* the initial send can be skipped */
64 #ifdef PASO_MPI
65 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
66 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
67 in->MPIInfo->comm,&status);
68 #endif
69 in->MPIInfo->msg_tag_counter++;
70 }
71 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
72 dof_0=distribution[buffer_rank];
73 dof_1=distribution[buffer_rank+1];
74 #pragma omp parallel for private(n,k) schedule(static)
75 for (n=0;n<in->numNodes;n++) {
76 k=in->globalDegreesOfFreedom[n];
77 if ((dof_0<=k) && (k<dof_1)) {
78 DOF_buffer[k-dof_0] = set_dof;
79 }
80 }
81 }
82 /* count the entries in the DOF_buffer */
83 /* TODO: OMP parallel */
84 myNewDOFs=0;
85 for (n=0; n<myDOFs; ++n) {
86 if ( DOF_buffer[n] == set_dof) {
87 DOF_buffer[n]=myNewDOFs;
88 myNewDOFs++;
89 }
90 }
91 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
92 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
93 #ifdef PASO_MPI
94 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
95 new_numGlobalDOFs=0;
96 for (n=0; n< in->MPIInfo->size; ++n) {
97 loc_offsets[n]=new_numGlobalDOFs;
98 new_numGlobalDOFs+=offsets[n];
99 }
100 #else
101 new_numGlobalDOFs=loc_offsets[0];
102 loc_offsets[0]=0;
103 #endif
104 #pragma omp parallel for private(n) schedule(static)
105 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
106 /* now entries are collected from the buffer again by sending the entries around in a circle */
107 #pragma omp parallel for private(n) schedule(static)
108 for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
109 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
110 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
111 buffer_rank=in->MPIInfo->rank;
112 for (p=0; p< in->MPIInfo->size; ++p) {
113 dof_0=distribution[buffer_rank];
114 dof_1=distribution[buffer_rank+1];
115 #pragma omp parallel for private(n,k) schedule(static)
116 for (n=0;n<in->numNodes;n++) {
117 k=in->globalDegreesOfFreedom[n];
118 if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
119 in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
120 set_new_DOF[n]=FALSE;
121 }
122 }
123 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
124 #ifdef PASO_MPI
125 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
126 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
127 in->MPIInfo->comm,&status);
128 #endif
129 in->MPIInfo->msg_tag_counter+=1;
130 }
131 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
132 }
133 }
134 TMPMEMFREE(DOF_buffer);
135 }
136 TMPMEMFREE(distribution);
137 TMPMEMFREE(loc_offsets);
138 TMPMEMFREE(offsets);
139 TMPMEMFREE(set_new_DOF);
140 return new_numGlobalDOFs;
141 }
142
143 void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){
144 index_t min_DOF,max_DOF, k;
145 dim_t n;
146 Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
147 /* first we calculate the min and max dof on this processor to reduce costs for seraching */
148 Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
149
150 for (p=0; p<in->MPIInfo->size; ++p) {
151 if (distribution[p]<=min_DOF) p_min=p;
152 if (distribution[p]<=max_DOF) p_max=p;
153 }
154 #pragma omp parallel for private(n,k,p) schedule(static)
155 for (n=0; n<in->numNodes; ++n) {
156 k=in->globalDegreesOfFreedom[n];
157 for (p=p_min; p<=p_max; ++p) {
158 if (k<distribution[p+1]) {
159 mpiRankOfDOF[n]=p;
160 break;
161 }
162 }
163 }
164 }
165 dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
166 {
167 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
168 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
169 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
170 #ifdef PASO_MPI
171 MPI_Status status;
172 #endif
173
174 /* get the global range of node ids */
175 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
176
177 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
178 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
179 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
180
181 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
182 /* distribute the range of node ids */
183 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
184 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
185 /* allocate buffers */
186 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
187 if (! Finley_checkPtr(DOF_buffer)) {
188 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
189 #pragma omp parallel for private(n) schedule(static)
190 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
191
192 /* fill the buffer by sending portions around in a circle */
193 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
194 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
195 buffer_rank=in->MPIInfo->rank;
196 for (p=0; p< in->MPIInfo->size; ++p) {
197 if (p>0) { /* the initial send can be skipped */
198 #ifdef PASO_MPI
199 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
200 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
201 in->MPIInfo->comm,&status);
202 #endif
203 in->MPIInfo->msg_tag_counter++;
204 }
205 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
206 dof_0=distribution[buffer_rank];
207 dof_1=distribution[buffer_rank+1];
208 #pragma omp parallel for private(n,k) schedule(static)
209 for (n=0;n<in->numNodes;n++) {
210 if (reducedNodeMask[n] >-1) {
211 k=in->globalDegreesOfFreedom[n];
212 if ((dof_0<=k) && (k<dof_1)) {
213 DOF_buffer[k-dof_0] = set_dof;
214 }
215 }
216 }
217 }
218 /* count the entries in the DOF_buffer */
219 /* TODO: OMP parallel */
220 myNewDOFs=0;
221 for (n=0; n<myDOFs; ++n) {
222 if ( DOF_buffer[n] == set_dof) {
223 DOF_buffer[n]=myNewDOFs;
224 myNewDOFs++;
225 }
226 }
227 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
228 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
229 #ifdef PASO_MPI
230 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
231 globalNumReducedDOFs=0;
232 for (n=0; n< in->MPIInfo->size; ++n) {
233 loc_offsets[n]=globalNumReducedDOFs;
234 globalNumReducedDOFs+=offsets[n];
235 }
236 #else
237 globalNumReducedDOFs=loc_offsets[0];
238 loc_offsets[0]=0;
239 #endif
240 #pragma omp parallel for private(n) schedule(static)
241 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
242 /* now entries are collected from the buffer again by sending the entries around in a circle */
243 #pragma omp parallel for private(n) schedule(static)
244 for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
245 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
246 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
247 buffer_rank=in->MPIInfo->rank;
248 for (p=0; p< in->MPIInfo->size; ++p) {
249 dof_0=distribution[buffer_rank];
250 dof_1=distribution[buffer_rank+1];
251 #pragma omp parallel for private(n,k) schedule(static)
252 for (n=0;n<in->numNodes;n++) {
253 if (reducedNodeMask[n] >-1) {
254 k=in->globalDegreesOfFreedom[n];
255 if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
256 }
257 }
258 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
259 #ifdef PASO_MPI
260 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
261 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
262 in->MPIInfo->comm,&status);
263 #endif
264 in->MPIInfo->msg_tag_counter+=1;
265 }
266 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
267 }
268 }
269 TMPMEMFREE(DOF_buffer);
270 }
271 TMPMEMFREE(distribution);
272 TMPMEMFREE(loc_offsets);
273 TMPMEMFREE(offsets);
274 return globalNumReducedDOFs;
275 }
276 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution)
277 {
278 index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL;
279 dim_t n, my_buffer_len, buffer_len, globalNumNodes, myNewNumNodes;
280 Paso_MPI_rank p, dest, source, buffer_rank;
281 const index_t unset_nodeID=-1, set_nodeID=1;
282 const dim_t header_len=2;
283 #ifdef PASO_MPI
284 MPI_Status status;
285 #endif
286 Paso_MPI_rank myRank=in->MPIInfo->rank;
287
288 /* find the range of node ids controled by me */
289
290 myFirstDOF=dof_distribution[in->MPIInfo->rank];
291 myLastDOF=dof_distribution[in->MPIInfo->rank+1];
292 max_id=-INDEX_T_MAX;
293 min_id=INDEX_T_MAX;
294 #pragma omp parallel private(loc_max_id,loc_min_id)
295 {
296 loc_max_id=max_id;
297 loc_min_id=min_id;
298 #pragma omp for private(n,dof,id) schedule(static)
299 for (n=0;n<in->numNodes;n++) {
300 dof=in->globalDegreesOfFreedom[n];
301 id=in->Id[n];
302 if ((myFirstDOF<= dof) && (dof< myLastDOF)) {
303 loc_max_id=MAX(loc_max_id,id);
304 loc_min_id=MIN(loc_min_id,id);
305 }
306 }
307 #pragma omp critical
308 {
309 max_id=MAX(loc_max_id,max_id);
310 min_id=MIN(loc_min_id,min_id);
311 }
312 }
313 /* allocate a buffer */
314 my_buffer_len=max_id> min_id ? max_id-min_id+1 :0;
315
316 #ifdef PASO_MPI
317 MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm );
318 #else
319 buffer_len=my_buffer_len;
320 #endif
321
322 Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t);
323 if (! Finley_checkPtr(Node_buffer)) {
324 /* mark and count the nodes in use */
325 #pragma omp parallel
326 {
327 #pragma omp for private(n) schedule(static)
328 for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID;
329 #pragma omp for private(n) schedule(static)
330 for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1;
331 #pragma omp for private(n,dof,id) schedule(static)
332 for (n=0;n<in->numNodes;n++) {
333 dof=in->globalDegreesOfFreedom[n];
334 id=in->Id[n];
335 if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID;
336 }
337 }
338 myNewNumNodes=0;
339 for (n=0;n<my_buffer_len;n++) {
340 if (Node_buffer[header_len+n]==set_nodeID) {
341 Node_buffer[header_len+n]=myNewNumNodes;
342 myNewNumNodes++;
343 }
344 }
345 /* make the local number of nodes globally available */
346 #ifdef PASO_MPI
347 MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm);
348 #else
349 node_distribution[0]=myNewNumNodes;
350 #endif
351 globalNumNodes=0;
352 for (p=0; p< in->MPIInfo->size; ++p) {
353 itmp=node_distribution[p];
354 node_distribution[p]=globalNumNodes;
355 globalNumNodes+=itmp;
356 }
357 node_distribution[in->MPIInfo->size]=globalNumNodes;
358
359 /* offset nodebuffer */
360 itmp=node_distribution[in->MPIInfo->rank];
361 #pragma omp for private(n) schedule(static)
362 for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp;
363
364 /* now we send this buffer around to assign global node index: */
365 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
366 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
367 Node_buffer[0]=min_id;
368 Node_buffer[1]=max_id;
369 buffer_rank=in->MPIInfo->rank;
370 for (p=0; p< in->MPIInfo->size; ++p) {
371 nodeID_0=Node_buffer[0];
372 nodeID_1=Node_buffer[1];
373 dof_0=dof_distribution[buffer_rank];
374 dof_1=dof_distribution[buffer_rank+1];
375 if (nodeID_0<=nodeID_1) {
376 #pragma omp for private(n,dof,id) schedule(static)
377 for (n=0;n<in->numNodes;n++) {
378 dof=in->globalDegreesOfFreedom[n];
379 id=in->Id[n]-nodeID_0;
380 if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len];
381 }
382 }
383 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
384 #ifdef PASO_MPI
385 MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT,
386 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
387 in->MPIInfo->comm,&status);
388 #endif
389 in->MPIInfo->msg_tag_counter+=1;
390 }
391 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
392 }
393 }
394 TMPMEMFREE(Node_buffer);
395 }
396
397 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
398 {
399 index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k;
400 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
401 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
402 #ifdef PASO_MPI
403 MPI_Status status;
404 #endif
405
406 /* get the global range of node ids */
407 Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in);
408
409 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
410 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
411 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
412
413 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
414 /* distribute the range of node ids */
415 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution);
416 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
417 /* allocate buffers */
418 Nodes_buffer=TMPMEMALLOC(buffer_len,index_t);
419 if (! Finley_checkPtr(Nodes_buffer)) {
420 /* fill Nodes_buffer by the unset_node marker to check if nodes are defined */
421 #pragma omp parallel for private(n) schedule(static)
422 for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node;
423
424 /* fill the buffer by sending portions around in a circle */
425 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
426 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
427 buffer_rank=in->MPIInfo->rank;
428 for (p=0; p< in->MPIInfo->size; ++p) {
429 if (p>0) { /* the initial send can be skipped */
430 #ifdef PASO_MPI
431 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
432 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
433 in->MPIInfo->comm,&status);
434 #endif
435 in->MPIInfo->msg_tag_counter++;
436 }
437 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
438 node_0=distribution[buffer_rank];
439 node_1=distribution[buffer_rank+1];
440 #pragma omp parallel for private(n,k) schedule(static)
441 for (n=0;n<in->numNodes;n++) {
442 if (reducedNodeMask[n] >-1) {
443 k=in->globalNodesIndex[n];
444 if ((node_0<=k) && (k<node_1)) {
445 Nodes_buffer[k-node_0] = set_node;
446 }
447 }
448 }
449 }
450 /* count the entries in the Nodes_buffer */
451 /* TODO: OMP parallel */
452 myNewNodes=0;
453 for (n=0; n<myNodes; ++n) {
454 if ( Nodes_buffer[n] == set_node) {
455 Nodes_buffer[n]=myNewNodes;
456 myNewNodes++;
457 }
458 }
459 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
460 loc_offsets[in->MPIInfo->rank]=myNewNodes;
461 #ifdef PASO_MPI
462 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
463 globalNumReducedNodes=0;
464 for (n=0; n< in->MPIInfo->size; ++n) {
465 loc_offsets[n]=globalNumReducedNodes;
466 globalNumReducedNodes+=offsets[n];
467 }
468 #else
469 globalNumReducedNodes=loc_offsets[0];
470 loc_offsets[0]=0;
471 #endif
472 #pragma omp parallel for private(n) schedule(static)
473 for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank];
474 /* now entries are collected from the buffer again by sending the entries around in a circle */
475 #pragma omp parallel for private(n) schedule(static)
476 for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
477 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
478 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
479 buffer_rank=in->MPIInfo->rank;
480 for (p=0; p< in->MPIInfo->size; ++p) {
481 node_0=distribution[buffer_rank];
482 node_1=distribution[buffer_rank+1];
483 #pragma omp parallel for private(n,k) schedule(static)
484 for (n=0;n<in->numNodes;n++) {
485 if (reducedNodeMask[n] >-1) {
486 k=in->globalNodesIndex[n];
487 if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0];
488 }
489 }
490 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
491 #ifdef PASO_MPI
492 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
493 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
494 in->MPIInfo->comm,&status);
495 #endif
496 in->MPIInfo->msg_tag_counter+=1;
497 }
498 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
499 }
500 }
501 TMPMEMFREE(Nodes_buffer);
502 }
503 TMPMEMFREE(distribution);
504 TMPMEMFREE(loc_offsets);
505 TMPMEMFREE(offsets);
506 return globalNumReducedNodes;
507 }

  ViewVC Help
Powered by ViewVC 1.1.26