/[escript]/trunk/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Contents of /trunk/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2548 - (show annotations)
Mon Jul 20 06:20:06 2009 UTC (9 years, 11 months ago) by jfenwick
File MIME type: text/plain
File size: 23175 byte(s)
Updating copyright notices
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2009 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Finley: Mesh: NodeFile */
18
19 /* creates a dense labeling of the global degrees of freedom */
20 /* and returns the new number of global degrees of freedom */
21
22 /**************************************************************/
23
24 #include "NodeFile.h"
25
26 /**************************************************************/
27
28 dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
29 {
30 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
31 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
32 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
33 bool_t *set_new_DOF=NULL;
34 #ifdef PASO_MPI
35 MPI_Status status;
36 #endif
37
38 /* get the global range of node ids */
39 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
40
41 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
42 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
43 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
44 set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
45
46 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
47 /* distribute the range of node ids */
48 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
49 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
50 /* allocate buffers */
51 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
52 if (! Finley_checkPtr(DOF_buffer)) {
53 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
54 #pragma omp parallel for private(n) schedule(static)
55 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
56
57 /* fill the buffer by sending portions around in a circle */
58 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
59 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
60 buffer_rank=in->MPIInfo->rank;
61 for (p=0; p< in->MPIInfo->size; ++p) {
62 if (p>0) { /* the initial send can be skipped */
63 #ifdef PASO_MPI
64 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
65 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
66 in->MPIInfo->comm,&status);
67 #endif
68 in->MPIInfo->msg_tag_counter++;
69 }
70 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
71 dof_0=distribution[buffer_rank];
72 dof_1=distribution[buffer_rank+1];
73 #pragma omp parallel for private(n,k) schedule(static)
74 for (n=0;n<in->numNodes;n++) {
75 k=in->globalDegreesOfFreedom[n];
76 if ((dof_0<=k) && (k<dof_1)) {
77 DOF_buffer[k-dof_0] = set_dof;
78 }
79 }
80 }
81 /* count the entries in the DOF_buffer */
82 /* TODO: OMP parallel */
83 myNewDOFs=0;
84 for (n=0; n<myDOFs; ++n) {
85 if ( DOF_buffer[n] == set_dof) {
86 DOF_buffer[n]=myNewDOFs;
87 myNewDOFs++;
88 }
89 }
90 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
91 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
92 #ifdef PASO_MPI
93 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
94 new_numGlobalDOFs=0;
95 for (n=0; n< in->MPIInfo->size; ++n) {
96 loc_offsets[n]=new_numGlobalDOFs;
97 new_numGlobalDOFs+=offsets[n];
98 }
99 #else
100 new_numGlobalDOFs=loc_offsets[0];
101 loc_offsets[0]=0;
102 #endif
103 #pragma omp parallel
104 {
105 #pragma omp for private(n) schedule(static)
106 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
107 /* now entries are collected from the buffer again by sending the entries around in a circle */
108 #pragma omp for private(n) schedule(static)
109 for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
110 }
111 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
112 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
113 buffer_rank=in->MPIInfo->rank;
114 for (p=0; p< in->MPIInfo->size; ++p) {
115 dof_0=distribution[buffer_rank];
116 dof_1=distribution[buffer_rank+1];
117 #pragma omp parallel for private(n,k) schedule(static)
118 for (n=0;n<in->numNodes;n++) {
119 k=in->globalDegreesOfFreedom[n];
120 if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
121 in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
122 set_new_DOF[n]=FALSE;
123 }
124 }
125 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
126 #ifdef PASO_MPI
127 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
128 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
129 in->MPIInfo->comm,&status);
130 #endif
131 in->MPIInfo->msg_tag_counter+=1;
132 }
133 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
134 }
135 }
136 TMPMEMFREE(DOF_buffer);
137 }
138 TMPMEMFREE(distribution);
139 TMPMEMFREE(loc_offsets);
140 TMPMEMFREE(offsets);
141 TMPMEMFREE(set_new_DOF);
142 return new_numGlobalDOFs;
143 }
144
145 void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){
146 index_t min_DOF,max_DOF, k;
147 dim_t n;
148 Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
149 /* first we calculate the min and max dof on this processor to reduce costs for seraching */
150 Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
151
152 for (p=0; p<in->MPIInfo->size; ++p) {
153 if (distribution[p]<=min_DOF) p_min=p;
154 if (distribution[p]<=max_DOF) p_max=p;
155 }
156 #pragma omp parallel for private(n,k,p) schedule(static)
157 for (n=0; n<in->numNodes; ++n) {
158 k=in->globalDegreesOfFreedom[n];
159 for (p=p_min; p<=p_max; ++p) {
160 if (k<distribution[p+1]) {
161 mpiRankOfDOF[n]=p;
162 break;
163 }
164 }
165 }
166 }
167 dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
168 {
169 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
170 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
171 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
172 #ifdef PASO_MPI
173 MPI_Status status;
174 #endif
175
176 /* get the global range of node ids */
177 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
178
179 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
180 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
181 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
182
183 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
184 /* distribute the range of node ids */
185 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
186 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
187 /* allocate buffers */
188 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
189 if (! Finley_checkPtr(DOF_buffer)) {
190 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
191 #pragma omp parallel for private(n) schedule(static)
192 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
193
194 /* fill the buffer by sending portions around in a circle */
195 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
196 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
197 buffer_rank=in->MPIInfo->rank;
198 for (p=0; p< in->MPIInfo->size; ++p) {
199 if (p>0) { /* the initial send can be skipped */
200 #ifdef PASO_MPI
201 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
202 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
203 in->MPIInfo->comm,&status);
204 #endif
205 in->MPIInfo->msg_tag_counter++;
206 }
207 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
208 dof_0=distribution[buffer_rank];
209 dof_1=distribution[buffer_rank+1];
210 #pragma omp parallel for private(n,k) schedule(static)
211 for (n=0;n<in->numNodes;n++) {
212 if (reducedNodeMask[n] >-1) {
213 k=in->globalDegreesOfFreedom[n];
214 if ((dof_0<=k) && (k<dof_1)) {
215 DOF_buffer[k-dof_0] = set_dof;
216 }
217 }
218 }
219 }
220 /* count the entries in the DOF_buffer */
221 /* TODO: OMP parallel */
222 myNewDOFs=0;
223 for (n=0; n<myDOFs; ++n) {
224 if ( DOF_buffer[n] == set_dof) {
225 DOF_buffer[n]=myNewDOFs;
226 myNewDOFs++;
227 }
228 }
229 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
230 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
231 #ifdef PASO_MPI
232 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
233 globalNumReducedDOFs=0;
234 for (n=0; n< in->MPIInfo->size; ++n) {
235 loc_offsets[n]=globalNumReducedDOFs;
236 globalNumReducedDOFs+=offsets[n];
237 }
238 #else
239 globalNumReducedDOFs=loc_offsets[0];
240 loc_offsets[0]=0;
241 #endif
242 #pragma omp parallel for private(n) schedule(static)
243 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
244 /* now entries are collected from the buffer again by sending the entries around in a circle */
245 #pragma omp parallel for private(n) schedule(static)
246 for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
247 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
248 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
249 buffer_rank=in->MPIInfo->rank;
250 for (p=0; p< in->MPIInfo->size; ++p) {
251 dof_0=distribution[buffer_rank];
252 dof_1=distribution[buffer_rank+1];
253 #pragma omp parallel for private(n,k) schedule(static)
254 for (n=0;n<in->numNodes;n++) {
255 if (reducedNodeMask[n] >-1) {
256 k=in->globalDegreesOfFreedom[n];
257 if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
258 }
259 }
260 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
261 #ifdef PASO_MPI
262 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
263 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
264 in->MPIInfo->comm,&status);
265 #endif
266 in->MPIInfo->msg_tag_counter+=1;
267 }
268 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
269 }
270 }
271 TMPMEMFREE(DOF_buffer);
272 }
273 TMPMEMFREE(distribution);
274 TMPMEMFREE(loc_offsets);
275 TMPMEMFREE(offsets);
276 return globalNumReducedDOFs;
277 }
278 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution)
279 {
280 index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL;
281 dim_t n, my_buffer_len, buffer_len, globalNumNodes=0, myNewNumNodes;
282 Paso_MPI_rank p, dest, source, buffer_rank;
283 const index_t unset_nodeID=-1, set_nodeID=1;
284 const dim_t header_len=2;
285 #ifdef PASO_MPI
286 MPI_Status status;
287 #endif
288 Paso_MPI_rank myRank=in->MPIInfo->rank;
289
290 /* find the range of node ids controled by me */
291
292 myFirstDOF=dof_distribution[myRank];
293 myLastDOF=dof_distribution[myRank+1];
294 max_id=-INDEX_T_MAX;
295 min_id=INDEX_T_MAX;
296 #pragma omp parallel private(loc_max_id,loc_min_id)
297 {
298 loc_max_id=max_id;
299 loc_min_id=min_id;
300 #pragma omp for private(n,dof,id) schedule(static)
301 for (n=0;n<in->numNodes;n++) {
302 dof=in->globalDegreesOfFreedom[n];
303 id=in->Id[n];
304 if ((myFirstDOF<= dof) && (dof< myLastDOF)) {
305 loc_max_id=MAX(loc_max_id,id);
306 loc_min_id=MIN(loc_min_id,id);
307 }
308 }
309 #pragma omp critical
310 {
311 max_id=MAX(loc_max_id,max_id);
312 min_id=MIN(loc_min_id,min_id);
313 }
314 }
315 /* allocate a buffer */
316 my_buffer_len=max_id>=min_id ? max_id-min_id+1 :0;
317
318 #ifdef PASO_MPI
319 MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm );
320 #else
321 buffer_len=my_buffer_len;
322 #endif
323
324 Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t);
325 if (! Finley_checkPtr(Node_buffer)) {
326 /* mark and count the nodes in use */
327 #pragma omp parallel
328 {
329 #pragma omp for private(n) schedule(static)
330 for (n=0;n<buffer_len+header_len;n++) Node_buffer[n]=unset_nodeID;
331 #pragma omp for private(n) schedule(static)
332 for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1;
333 #pragma omp for private(n,dof,id) schedule(static)
334 for (n=0;n<in->numNodes;n++) {
335 dof=in->globalDegreesOfFreedom[n];
336 id=in->Id[n];
337 if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID;
338 }
339 }
340 myNewNumNodes=0;
341 for (n=0;n<my_buffer_len;n++) {
342 if (Node_buffer[header_len+n]==set_nodeID) {
343 Node_buffer[header_len+n]=myNewNumNodes;
344 myNewNumNodes++;
345 }
346 }
347 /* make the local number of nodes globally available */
348 #ifdef PASO_MPI
349 MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm);
350 #else
351 node_distribution[0]=myNewNumNodes;
352 #endif
353
354
355 globalNumNodes=0;
356 for (p=0; p< in->MPIInfo->size; ++p) {
357 itmp=node_distribution[p];
358 node_distribution[p]=globalNumNodes;
359 globalNumNodes+=itmp;
360 }
361 node_distribution[in->MPIInfo->size]=globalNumNodes;
362
363 /* offset nodebuffer */
364 itmp=node_distribution[in->MPIInfo->rank];
365 #pragma omp for private(n) schedule(static)
366 for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp;
367
368
369 /* now we send this buffer around to assign global node index: */
370 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
371 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
372 Node_buffer[0]=min_id;
373 Node_buffer[1]=max_id;
374 buffer_rank=in->MPIInfo->rank;
375 for (p=0; p< in->MPIInfo->size; ++p) {
376 nodeID_0=Node_buffer[0];
377 nodeID_1=Node_buffer[1];
378 dof_0=dof_distribution[buffer_rank];
379 dof_1=dof_distribution[buffer_rank+1];
380 if (nodeID_0<=nodeID_1) {
381 #pragma omp for private(n,dof,id) schedule(static)
382 for (n=0;n<in->numNodes;n++) {
383 dof=in->globalDegreesOfFreedom[n];
384 id=in->Id[n]-nodeID_0;
385 if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len];
386 }
387 }
388 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
389 #ifdef PASO_MPI
390 MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT,
391 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
392 in->MPIInfo->comm,&status);
393 #endif
394 in->MPIInfo->msg_tag_counter+=1;
395 }
396 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
397 }
398 }
399 TMPMEMFREE(Node_buffer);
400 return globalNumNodes;
401 }
402
403 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
404 {
405 index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k;
406 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
407 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
408 #ifdef PASO_MPI
409 MPI_Status status;
410 #endif
411
412 /* get the global range of node ids */
413 Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in);
414
415 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
416 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
417 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
418
419 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
420 /* distribute the range of node ids */
421 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution);
422 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
423 /* allocate buffers */
424 Nodes_buffer=TMPMEMALLOC(buffer_len,index_t);
425 if (! Finley_checkPtr(Nodes_buffer)) {
426 /* fill Nodes_buffer by the unset_node marker to check if nodes are defined */
427 #pragma omp parallel for private(n) schedule(static)
428 for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node;
429
430 /* fill the buffer by sending portions around in a circle */
431 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
432 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
433 buffer_rank=in->MPIInfo->rank;
434 for (p=0; p< in->MPIInfo->size; ++p) {
435 if (p>0) { /* the initial send can be skipped */
436 #ifdef PASO_MPI
437 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
438 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
439 in->MPIInfo->comm,&status);
440 #endif
441 in->MPIInfo->msg_tag_counter++;
442 }
443 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
444 node_0=distribution[buffer_rank];
445 node_1=distribution[buffer_rank+1];
446 #pragma omp parallel for private(n,k) schedule(static)
447 for (n=0;n<in->numNodes;n++) {
448 if (reducedNodeMask[n] >-1) {
449 k=in->globalNodesIndex[n];
450 if ((node_0<=k) && (k<node_1)) {
451 Nodes_buffer[k-node_0] = set_node;
452 }
453 }
454 }
455 }
456 /* count the entries in the Nodes_buffer */
457 /* TODO: OMP parallel */
458 myNewNodes=0;
459 for (n=0; n<myNodes; ++n) {
460 if ( Nodes_buffer[n] == set_node) {
461 Nodes_buffer[n]=myNewNodes;
462 myNewNodes++;
463 }
464 }
465 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
466 loc_offsets[in->MPIInfo->rank]=myNewNodes;
467 #ifdef PASO_MPI
468 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
469 globalNumReducedNodes=0;
470 for (n=0; n< in->MPIInfo->size; ++n) {
471 loc_offsets[n]=globalNumReducedNodes;
472 globalNumReducedNodes+=offsets[n];
473 }
474 #else
475 globalNumReducedNodes=loc_offsets[0];
476 loc_offsets[0]=0;
477 #endif
478 #pragma omp parallel for private(n) schedule(static)
479 for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank];
480 /* now entries are collected from the buffer again by sending the entries around in a circle */
481 #pragma omp parallel for private(n) schedule(static)
482 for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
483 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
484 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
485 buffer_rank=in->MPIInfo->rank;
486 for (p=0; p< in->MPIInfo->size; ++p) {
487 node_0=distribution[buffer_rank];
488 node_1=distribution[buffer_rank+1];
489 #pragma omp parallel for private(n,k) schedule(static)
490 for (n=0;n<in->numNodes;n++) {
491 if (reducedNodeMask[n] >-1) {
492 k=in->globalNodesIndex[n];
493 if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0];
494 }
495 }
496 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
497 #ifdef PASO_MPI
498 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
499 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
500 in->MPIInfo->comm,&status);
501 #endif
502 in->MPIInfo->msg_tag_counter+=1;
503 }
504 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
505 }
506 }
507 TMPMEMFREE(Nodes_buffer);
508 }
509 TMPMEMFREE(distribution);
510 TMPMEMFREE(loc_offsets);
511 TMPMEMFREE(offsets);
512 return globalNumReducedNodes;
513 }

  ViewVC Help
Powered by ViewVC 1.1.26