/[escript]/trunk/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Contents of /trunk/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1981 - (show annotations)
Thu Nov 6 05:27:33 2008 UTC (11 years, 1 month ago) by jfenwick
File MIME type: text/plain
File size: 23172 byte(s)
More warning removal.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Finley: Mesh: NodeFile */
18
19 /* creates a dense labeling of the global degrees of freedom */
20 /* and returns the new number of global degrees of freedom */
21
22 /**************************************************************/
23
24 #include "NodeFile.h"
25
26 /**************************************************************/
27
28 dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
29 {
30 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
31 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
32 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
33 bool_t *set_new_DOF=NULL;
34 #ifdef PASO_MPI
35 MPI_Status status;
36 #endif
37
38 /* get the global range of node ids */
39 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
40
41 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
42 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
43 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
44 set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
45
46 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
47 /* distribute the range of node ids */
48 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
49 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
50 /* allocate buffers */
51 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
52 if (! Finley_checkPtr(DOF_buffer)) {
53 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
54 #pragma omp parallel for private(n) schedule(static)
55 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
56
57 /* fill the buffer by sending portions around in a circle */
58 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
59 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
60 buffer_rank=in->MPIInfo->rank;
61 for (p=0; p< in->MPIInfo->size; ++p) {
62 if (p>0) { /* the initial send can be skipped */
63 #ifdef PASO_MPI
64 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
65 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
66 in->MPIInfo->comm,&status);
67 #endif
68 in->MPIInfo->msg_tag_counter++;
69 }
70 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
71 dof_0=distribution[buffer_rank];
72 dof_1=distribution[buffer_rank+1];
73 #pragma omp parallel for private(n,k) schedule(static)
74 for (n=0;n<in->numNodes;n++) {
75 k=in->globalDegreesOfFreedom[n];
76 if ((dof_0<=k) && (k<dof_1)) {
77 DOF_buffer[k-dof_0] = set_dof;
78 }
79 }
80 }
81 /* count the entries in the DOF_buffer */
82 /* TODO: OMP parallel */
83 myNewDOFs=0;
84 for (n=0; n<myDOFs; ++n) {
85 if ( DOF_buffer[n] == set_dof) {
86 DOF_buffer[n]=myNewDOFs;
87 myNewDOFs++;
88 }
89 }
90 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
91 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
92 #ifdef PASO_MPI
93 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
94 new_numGlobalDOFs=0;
95 for (n=0; n< in->MPIInfo->size; ++n) {
96 loc_offsets[n]=new_numGlobalDOFs;
97 new_numGlobalDOFs+=offsets[n];
98 }
99 #else
100 new_numGlobalDOFs=loc_offsets[0];
101 loc_offsets[0]=0;
102 #endif
103 #pragma omp parallel
104 {
105 #pragma omp for private(n) schedule(static)
106 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
107 /* now entries are collected from the buffer again by sending the entries around in a circle */
108 #pragma omp for private(n) schedule(static)
109 for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
110 }
111 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
112 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
113 buffer_rank=in->MPIInfo->rank;
114 for (p=0; p< in->MPIInfo->size; ++p) {
115 dof_0=distribution[buffer_rank];
116 dof_1=distribution[buffer_rank+1];
117 #pragma omp parallel for private(n,k) schedule(static)
118 for (n=0;n<in->numNodes;n++) {
119 k=in->globalDegreesOfFreedom[n];
120 if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
121 in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
122 set_new_DOF[n]=FALSE;
123 }
124 }
125 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
126 #ifdef PASO_MPI
127 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
128 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
129 in->MPIInfo->comm,&status);
130 #endif
131 in->MPIInfo->msg_tag_counter+=1;
132 }
133 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
134 }
135 }
136 TMPMEMFREE(DOF_buffer);
137 }
138 TMPMEMFREE(distribution);
139 TMPMEMFREE(loc_offsets);
140 TMPMEMFREE(offsets);
141 TMPMEMFREE(set_new_DOF);
142 return new_numGlobalDOFs;
143 }
144
145 void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){
146 index_t min_DOF,max_DOF, k;
147 dim_t n;
148 Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
149 /* first we calculate the min and max dof on this processor to reduce costs for seraching */
150 Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
151
152 for (p=0; p<in->MPIInfo->size; ++p) {
153 if (distribution[p]<=min_DOF) p_min=p;
154 if (distribution[p]<=max_DOF) p_max=p;
155 }
156 #pragma omp parallel for private(n,k,p) schedule(static)
157 for (n=0; n<in->numNodes; ++n) {
158 k=in->globalDegreesOfFreedom[n];
159 for (p=p_min; p<=p_max; ++p) {
160 if (k<distribution[p+1]) {
161 mpiRankOfDOF[n]=p;
162 break;
163 }
164 }
165 }
166 }
167 dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
168 {
169 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
170 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
171 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
172 #ifdef PASO_MPI
173 MPI_Status status;
174 #endif
175
176 /* get the global range of node ids */
177 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
178
179 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
180 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
181 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
182
183 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
184 /* distribute the range of node ids */
185 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
186 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
187 /* allocate buffers */
188 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
189 if (! Finley_checkPtr(DOF_buffer)) {
190 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
191 #pragma omp parallel for private(n) schedule(static)
192 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
193
194 /* fill the buffer by sending portions around in a circle */
195 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
196 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
197 buffer_rank=in->MPIInfo->rank;
198 for (p=0; p< in->MPIInfo->size; ++p) {
199 if (p>0) { /* the initial send can be skipped */
200 #ifdef PASO_MPI
201 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
202 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
203 in->MPIInfo->comm,&status);
204 #endif
205 in->MPIInfo->msg_tag_counter++;
206 }
207 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
208 dof_0=distribution[buffer_rank];
209 dof_1=distribution[buffer_rank+1];
210 #pragma omp parallel for private(n,k) schedule(static)
211 for (n=0;n<in->numNodes;n++) {
212 if (reducedNodeMask[n] >-1) {
213 k=in->globalDegreesOfFreedom[n];
214 if ((dof_0<=k) && (k<dof_1)) {
215 DOF_buffer[k-dof_0] = set_dof;
216 }
217 }
218 }
219 }
220 /* count the entries in the DOF_buffer */
221 /* TODO: OMP parallel */
222 myNewDOFs=0;
223 for (n=0; n<myDOFs; ++n) {
224 if ( DOF_buffer[n] == set_dof) {
225 DOF_buffer[n]=myNewDOFs;
226 myNewDOFs++;
227 }
228 }
229 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
230 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
231 #ifdef PASO_MPI
232 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
233 globalNumReducedDOFs=0;
234 for (n=0; n< in->MPIInfo->size; ++n) {
235 loc_offsets[n]=globalNumReducedDOFs;
236 globalNumReducedDOFs+=offsets[n];
237 }
238 #else
239 globalNumReducedDOFs=loc_offsets[0];
240 loc_offsets[0]=0;
241 #endif
242 #pragma omp parallel for private(n) schedule(static)
243 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
244 /* now entries are collected from the buffer again by sending the entries around in a circle */
245 #pragma omp parallel for private(n) schedule(static)
246 for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
247 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
248 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
249 buffer_rank=in->MPIInfo->rank;
250 for (p=0; p< in->MPIInfo->size; ++p) {
251 dof_0=distribution[buffer_rank];
252 dof_1=distribution[buffer_rank+1];
253 #pragma omp parallel for private(n,k) schedule(static)
254 for (n=0;n<in->numNodes;n++) {
255 if (reducedNodeMask[n] >-1) {
256 k=in->globalDegreesOfFreedom[n];
257 if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
258 }
259 }
260 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
261 #ifdef PASO_MPI
262 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
263 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
264 in->MPIInfo->comm,&status);
265 #endif
266 in->MPIInfo->msg_tag_counter+=1;
267 }
268 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
269 }
270 }
271 TMPMEMFREE(DOF_buffer);
272 }
273 TMPMEMFREE(distribution);
274 TMPMEMFREE(loc_offsets);
275 TMPMEMFREE(offsets);
276 return globalNumReducedDOFs;
277 }
278 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution)
279 {
280 index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL;
281 dim_t n, my_buffer_len, buffer_len, globalNumNodes=0, myNewNumNodes;
282 Paso_MPI_rank p, dest, source, buffer_rank;
283 const index_t unset_nodeID=-1, set_nodeID=1;
284 const dim_t header_len=2;
285 #ifdef PASO_MPI
286 MPI_Status status;
287 #endif
288 Paso_MPI_rank myRank=in->MPIInfo->rank;
289
290 /* find the range of node ids controled by me */
291
292 myFirstDOF=dof_distribution[myRank];
293 myLastDOF=dof_distribution[myRank+1];
294 max_id=-INDEX_T_MAX;
295 min_id=INDEX_T_MAX;
296 #pragma omp parallel private(loc_max_id,loc_min_id)
297 {
298 loc_max_id=max_id;
299 loc_min_id=min_id;
300 #pragma omp for private(n,dof,id) schedule(static)
301 for (n=0;n<in->numNodes;n++) {
302 dof=in->globalDegreesOfFreedom[n];
303 id=in->Id[n];
304 if ((myFirstDOF<= dof) && (dof< myLastDOF)) {
305 loc_max_id=MAX(loc_max_id,id);
306 loc_min_id=MIN(loc_min_id,id);
307 }
308 }
309 #pragma omp critical
310 {
311 max_id=MAX(loc_max_id,max_id);
312 min_id=MIN(loc_min_id,min_id);
313 }
314 }
315 /* allocate a buffer */
316 my_buffer_len=max_id> min_id ? max_id-min_id+1 :0;
317
318 #ifdef PASO_MPI
319 MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm );
320 #else
321 buffer_len=my_buffer_len;
322 #endif
323
324 Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t);
325 if (! Finley_checkPtr(Node_buffer)) {
326 /* mark and count the nodes in use */
327 #pragma omp parallel
328 {
329 #pragma omp for private(n) schedule(static)
330 for (n=0;n<buffer_len+header_len;n++) Node_buffer[n]=unset_nodeID;
331 #pragma omp for private(n) schedule(static)
332 for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1;
333 #pragma omp for private(n,dof,id) schedule(static)
334 for (n=0;n<in->numNodes;n++) {
335 dof=in->globalDegreesOfFreedom[n];
336 id=in->Id[n];
337 if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID;
338 }
339 }
340 myNewNumNodes=0;
341 for (n=0;n<my_buffer_len;n++) {
342 if (Node_buffer[header_len+n]==set_nodeID) {
343 Node_buffer[header_len+n]=myNewNumNodes;
344 myNewNumNodes++;
345 }
346 }
347 /* make the local number of nodes globally available */
348 #ifdef PASO_MPI
349 MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm);
350 #else
351 node_distribution[0]=myNewNumNodes;
352 #endif
353 globalNumNodes=0;
354 for (p=0; p< in->MPIInfo->size; ++p) {
355 itmp=node_distribution[p];
356 node_distribution[p]=globalNumNodes;
357 globalNumNodes+=itmp;
358 }
359 node_distribution[in->MPIInfo->size]=globalNumNodes;
360
361 /* offset nodebuffer */
362 itmp=node_distribution[in->MPIInfo->rank];
363 #pragma omp for private(n) schedule(static)
364 for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp;
365
366 /* now we send this buffer around to assign global node index: */
367 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
368 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
369 Node_buffer[0]=min_id;
370 Node_buffer[1]=max_id;
371 buffer_rank=in->MPIInfo->rank;
372 for (p=0; p< in->MPIInfo->size; ++p) {
373 nodeID_0=Node_buffer[0];
374 nodeID_1=Node_buffer[1];
375 dof_0=dof_distribution[buffer_rank];
376 dof_1=dof_distribution[buffer_rank+1];
377 if (nodeID_0<=nodeID_1) {
378 #pragma omp for private(n,dof,id) schedule(static)
379 for (n=0;n<in->numNodes;n++) {
380 dof=in->globalDegreesOfFreedom[n];
381 id=in->Id[n]-nodeID_0;
382 if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len];
383 }
384 }
385 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
386 #ifdef PASO_MPI
387 MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT,
388 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
389 in->MPIInfo->comm,&status);
390 #endif
391 in->MPIInfo->msg_tag_counter+=1;
392 }
393 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
394 }
395 }
396 TMPMEMFREE(Node_buffer);
397 return globalNumNodes;
398 }
399
400 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
401 {
402 index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k;
403 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
404 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
405 #ifdef PASO_MPI
406 MPI_Status status;
407 #endif
408
409 /* get the global range of node ids */
410 Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in);
411
412 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
413 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
414 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
415
416 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
417 /* distribute the range of node ids */
418 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution);
419 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
420 /* allocate buffers */
421 Nodes_buffer=TMPMEMALLOC(buffer_len,index_t);
422 if (! Finley_checkPtr(Nodes_buffer)) {
423 /* fill Nodes_buffer by the unset_node marker to check if nodes are defined */
424 #pragma omp parallel for private(n) schedule(static)
425 for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node;
426
427 /* fill the buffer by sending portions around in a circle */
428 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
429 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
430 buffer_rank=in->MPIInfo->rank;
431 for (p=0; p< in->MPIInfo->size; ++p) {
432 if (p>0) { /* the initial send can be skipped */
433 #ifdef PASO_MPI
434 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
435 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
436 in->MPIInfo->comm,&status);
437 #endif
438 in->MPIInfo->msg_tag_counter++;
439 }
440 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
441 node_0=distribution[buffer_rank];
442 node_1=distribution[buffer_rank+1];
443 #pragma omp parallel for private(n,k) schedule(static)
444 for (n=0;n<in->numNodes;n++) {
445 if (reducedNodeMask[n] >-1) {
446 k=in->globalNodesIndex[n];
447 if ((node_0<=k) && (k<node_1)) {
448 Nodes_buffer[k-node_0] = set_node;
449 }
450 }
451 }
452 }
453 /* count the entries in the Nodes_buffer */
454 /* TODO: OMP parallel */
455 myNewNodes=0;
456 for (n=0; n<myNodes; ++n) {
457 if ( Nodes_buffer[n] == set_node) {
458 Nodes_buffer[n]=myNewNodes;
459 myNewNodes++;
460 }
461 }
462 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
463 loc_offsets[in->MPIInfo->rank]=myNewNodes;
464 #ifdef PASO_MPI
465 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
466 globalNumReducedNodes=0;
467 for (n=0; n< in->MPIInfo->size; ++n) {
468 loc_offsets[n]=globalNumReducedNodes;
469 globalNumReducedNodes+=offsets[n];
470 }
471 #else
472 globalNumReducedNodes=loc_offsets[0];
473 loc_offsets[0]=0;
474 #endif
475 #pragma omp parallel for private(n) schedule(static)
476 for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank];
477 /* now entries are collected from the buffer again by sending the entries around in a circle */
478 #pragma omp parallel for private(n) schedule(static)
479 for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
480 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
481 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
482 buffer_rank=in->MPIInfo->rank;
483 for (p=0; p< in->MPIInfo->size; ++p) {
484 node_0=distribution[buffer_rank];
485 node_1=distribution[buffer_rank+1];
486 #pragma omp parallel for private(n,k) schedule(static)
487 for (n=0;n<in->numNodes;n++) {
488 if (reducedNodeMask[n] >-1) {
489 k=in->globalNodesIndex[n];
490 if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0];
491 }
492 }
493 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
494 #ifdef PASO_MPI
495 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
496 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
497 in->MPIInfo->comm,&status);
498 #endif
499 in->MPIInfo->msg_tag_counter+=1;
500 }
501 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
502 }
503 }
504 TMPMEMFREE(Nodes_buffer);
505 }
506 TMPMEMFREE(distribution);
507 TMPMEMFREE(loc_offsets);
508 TMPMEMFREE(offsets);
509 return globalNumReducedNodes;
510 }

  ViewVC Help
Powered by ViewVC 1.1.26