/[escript]/branches/arrayview_from_1695_trunk/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Contents of /branches/arrayview_from_1695_trunk/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1781 - (show annotations)
Thu Sep 11 05:03:14 2008 UTC (10 years, 9 months ago) by jfenwick
File MIME type: text/plain
File size: 23227 byte(s)
Branch commit

Merged changes from trunk version 1695 up to and including version 1779.


1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /**************************************************************/
17
18 /* Finley: Mesh: NodeFile */
19
20 /* creates a dense labeling of the global degrees of freedom */
21 /* and returns the new number of global degrees of freedom */
22
23 /**************************************************************/
24
25 #include "NodeFile.h"
26
27 /**************************************************************/
28
29 dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
30 {
31 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
32 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
33 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
34 bool_t *set_new_DOF=NULL;
35 #ifdef PASO_MPI
36 MPI_Status status;
37 #endif
38
39 /* get the global range of node ids */
40 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
41
42 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
43 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
44 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
45 set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
46
47 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
48 /* distribute the range of node ids */
49 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
50 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
51 /* allocate buffers */
52 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
53 if (! Finley_checkPtr(DOF_buffer)) {
54 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
55 #pragma omp parallel for private(n) schedule(static)
56 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
57
58 /* fill the buffer by sending portions around in a circle */
59 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
60 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
61 buffer_rank=in->MPIInfo->rank;
62 for (p=0; p< in->MPIInfo->size; ++p) {
63 if (p>0) { /* the initial send can be skipped */
64 #ifdef PASO_MPI
65 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
66 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
67 in->MPIInfo->comm,&status);
68 #endif
69 in->MPIInfo->msg_tag_counter++;
70 }
71 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
72 dof_0=distribution[buffer_rank];
73 dof_1=distribution[buffer_rank+1];
74 #pragma omp parallel for private(n,k) schedule(static)
75 for (n=0;n<in->numNodes;n++) {
76 k=in->globalDegreesOfFreedom[n];
77 if ((dof_0<=k) && (k<dof_1)) {
78 DOF_buffer[k-dof_0] = set_dof;
79 }
80 }
81 }
82 /* count the entries in the DOF_buffer */
83 /* TODO: OMP parallel */
84 myNewDOFs=0;
85 for (n=0; n<myDOFs; ++n) {
86 if ( DOF_buffer[n] == set_dof) {
87 DOF_buffer[n]=myNewDOFs;
88 myNewDOFs++;
89 }
90 }
91 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
92 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
93 #ifdef PASO_MPI
94 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
95 new_numGlobalDOFs=0;
96 for (n=0; n< in->MPIInfo->size; ++n) {
97 loc_offsets[n]=new_numGlobalDOFs;
98 new_numGlobalDOFs+=offsets[n];
99 }
100 #else
101 new_numGlobalDOFs=loc_offsets[0];
102 loc_offsets[0]=0;
103 #endif
104 #pragma omp parallel
105 {
106 #pragma omp for private(n) schedule(static)
107 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
108 /* now entries are collected from the buffer again by sending the entries around in a circle */
109 #pragma omp for private(n) schedule(static)
110 for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
111 }
112 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
113 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
114 buffer_rank=in->MPIInfo->rank;
115 for (p=0; p< in->MPIInfo->size; ++p) {
116 dof_0=distribution[buffer_rank];
117 dof_1=distribution[buffer_rank+1];
118 #pragma omp parallel for private(n,k) schedule(static)
119 for (n=0;n<in->numNodes;n++) {
120 k=in->globalDegreesOfFreedom[n];
121 if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
122 in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
123 set_new_DOF[n]=FALSE;
124 }
125 }
126 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
127 #ifdef PASO_MPI
128 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
129 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
130 in->MPIInfo->comm,&status);
131 #endif
132 in->MPIInfo->msg_tag_counter+=1;
133 }
134 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
135 }
136 }
137 TMPMEMFREE(DOF_buffer);
138 }
139 TMPMEMFREE(distribution);
140 TMPMEMFREE(loc_offsets);
141 TMPMEMFREE(offsets);
142 TMPMEMFREE(set_new_DOF);
143 return new_numGlobalDOFs;
144 }
145
146 void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){
147 index_t min_DOF,max_DOF, k;
148 dim_t n;
149 Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
150 /* first we calculate the min and max dof on this processor to reduce costs for seraching */
151 Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
152
153 for (p=0; p<in->MPIInfo->size; ++p) {
154 if (distribution[p]<=min_DOF) p_min=p;
155 if (distribution[p]<=max_DOF) p_max=p;
156 }
157 #pragma omp parallel for private(n,k,p) schedule(static)
158 for (n=0; n<in->numNodes; ++n) {
159 k=in->globalDegreesOfFreedom[n];
160 for (p=p_min; p<=p_max; ++p) {
161 if (k<distribution[p+1]) {
162 mpiRankOfDOF[n]=p;
163 break;
164 }
165 }
166 }
167 }
168 dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
169 {
170 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
171 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
172 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
173 #ifdef PASO_MPI
174 MPI_Status status;
175 #endif
176
177 /* get the global range of node ids */
178 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
179
180 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
181 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
182 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
183
184 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
185 /* distribute the range of node ids */
186 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
187 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
188 /* allocate buffers */
189 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
190 if (! Finley_checkPtr(DOF_buffer)) {
191 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
192 #pragma omp parallel for private(n) schedule(static)
193 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
194
195 /* fill the buffer by sending portions around in a circle */
196 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
197 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
198 buffer_rank=in->MPIInfo->rank;
199 for (p=0; p< in->MPIInfo->size; ++p) {
200 if (p>0) { /* the initial send can be skipped */
201 #ifdef PASO_MPI
202 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
203 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
204 in->MPIInfo->comm,&status);
205 #endif
206 in->MPIInfo->msg_tag_counter++;
207 }
208 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
209 dof_0=distribution[buffer_rank];
210 dof_1=distribution[buffer_rank+1];
211 #pragma omp parallel for private(n,k) schedule(static)
212 for (n=0;n<in->numNodes;n++) {
213 if (reducedNodeMask[n] >-1) {
214 k=in->globalDegreesOfFreedom[n];
215 if ((dof_0<=k) && (k<dof_1)) {
216 DOF_buffer[k-dof_0] = set_dof;
217 }
218 }
219 }
220 }
221 /* count the entries in the DOF_buffer */
222 /* TODO: OMP parallel */
223 myNewDOFs=0;
224 for (n=0; n<myDOFs; ++n) {
225 if ( DOF_buffer[n] == set_dof) {
226 DOF_buffer[n]=myNewDOFs;
227 myNewDOFs++;
228 }
229 }
230 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
231 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
232 #ifdef PASO_MPI
233 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
234 globalNumReducedDOFs=0;
235 for (n=0; n< in->MPIInfo->size; ++n) {
236 loc_offsets[n]=globalNumReducedDOFs;
237 globalNumReducedDOFs+=offsets[n];
238 }
239 #else
240 globalNumReducedDOFs=loc_offsets[0];
241 loc_offsets[0]=0;
242 #endif
243 #pragma omp parallel for private(n) schedule(static)
244 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
245 /* now entries are collected from the buffer again by sending the entries around in a circle */
246 #pragma omp parallel for private(n) schedule(static)
247 for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
248 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
249 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
250 buffer_rank=in->MPIInfo->rank;
251 for (p=0; p< in->MPIInfo->size; ++p) {
252 dof_0=distribution[buffer_rank];
253 dof_1=distribution[buffer_rank+1];
254 #pragma omp parallel for private(n,k) schedule(static)
255 for (n=0;n<in->numNodes;n++) {
256 if (reducedNodeMask[n] >-1) {
257 k=in->globalDegreesOfFreedom[n];
258 if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
259 }
260 }
261 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
262 #ifdef PASO_MPI
263 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
264 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
265 in->MPIInfo->comm,&status);
266 #endif
267 in->MPIInfo->msg_tag_counter+=1;
268 }
269 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
270 }
271 }
272 TMPMEMFREE(DOF_buffer);
273 }
274 TMPMEMFREE(distribution);
275 TMPMEMFREE(loc_offsets);
276 TMPMEMFREE(offsets);
277 return globalNumReducedDOFs;
278 }
279 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution)
280 {
281 index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL;
282 dim_t n, my_buffer_len, buffer_len, globalNumNodes, myNewNumNodes;
283 Paso_MPI_rank p, dest, source, buffer_rank;
284 const index_t unset_nodeID=-1, set_nodeID=1;
285 const dim_t header_len=2;
286 #ifdef PASO_MPI
287 MPI_Status status;
288 #endif
289 Paso_MPI_rank myRank=in->MPIInfo->rank;
290
291 /* find the range of node ids controled by me */
292
293 myFirstDOF=dof_distribution[in->MPIInfo->rank];
294 myLastDOF=dof_distribution[in->MPIInfo->rank+1];
295 max_id=-INDEX_T_MAX;
296 min_id=INDEX_T_MAX;
297 #pragma omp parallel private(loc_max_id,loc_min_id)
298 {
299 loc_max_id=max_id;
300 loc_min_id=min_id;
301 #pragma omp for private(n,dof,id) schedule(static)
302 for (n=0;n<in->numNodes;n++) {
303 dof=in->globalDegreesOfFreedom[n];
304 id=in->Id[n];
305 if ((myFirstDOF<= dof) && (dof< myLastDOF)) {
306 loc_max_id=MAX(loc_max_id,id);
307 loc_min_id=MIN(loc_min_id,id);
308 }
309 }
310 #pragma omp critical
311 {
312 max_id=MAX(loc_max_id,max_id);
313 min_id=MIN(loc_min_id,min_id);
314 }
315 }
316 /* allocate a buffer */
317 my_buffer_len=max_id> min_id ? max_id-min_id+1 :0;
318
319 #ifdef PASO_MPI
320 MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm );
321 #else
322 buffer_len=my_buffer_len;
323 #endif
324
325 Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t);
326 if (! Finley_checkPtr(Node_buffer)) {
327 /* mark and count the nodes in use */
328 #pragma omp parallel
329 {
330 #pragma omp for private(n) schedule(static)
331 for (n=0;n<buffer_len+header_len;n++) Node_buffer[n]=unset_nodeID;
332 #pragma omp for private(n) schedule(static)
333 for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1;
334 #pragma omp for private(n,dof,id) schedule(static)
335 for (n=0;n<in->numNodes;n++) {
336 dof=in->globalDegreesOfFreedom[n];
337 id=in->Id[n];
338 if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID;
339 }
340 }
341 myNewNumNodes=0;
342 for (n=0;n<my_buffer_len;n++) {
343 if (Node_buffer[header_len+n]==set_nodeID) {
344 Node_buffer[header_len+n]=myNewNumNodes;
345 myNewNumNodes++;
346 }
347 }
348 /* make the local number of nodes globally available */
349 #ifdef PASO_MPI
350 MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm);
351 #else
352 node_distribution[0]=myNewNumNodes;
353 #endif
354 globalNumNodes=0;
355 for (p=0; p< in->MPIInfo->size; ++p) {
356 itmp=node_distribution[p];
357 node_distribution[p]=globalNumNodes;
358 globalNumNodes+=itmp;
359 }
360 node_distribution[in->MPIInfo->size]=globalNumNodes;
361
362 /* offset nodebuffer */
363 itmp=node_distribution[in->MPIInfo->rank];
364 #pragma omp for private(n) schedule(static)
365 for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp;
366
367 /* now we send this buffer around to assign global node index: */
368 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
369 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
370 Node_buffer[0]=min_id;
371 Node_buffer[1]=max_id;
372 buffer_rank=in->MPIInfo->rank;
373 for (p=0; p< in->MPIInfo->size; ++p) {
374 nodeID_0=Node_buffer[0];
375 nodeID_1=Node_buffer[1];
376 dof_0=dof_distribution[buffer_rank];
377 dof_1=dof_distribution[buffer_rank+1];
378 if (nodeID_0<=nodeID_1) {
379 #pragma omp for private(n,dof,id) schedule(static)
380 for (n=0;n<in->numNodes;n++) {
381 dof=in->globalDegreesOfFreedom[n];
382 id=in->Id[n]-nodeID_0;
383 if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len];
384 }
385 }
386 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
387 #ifdef PASO_MPI
388 MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT,
389 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
390 in->MPIInfo->comm,&status);
391 #endif
392 in->MPIInfo->msg_tag_counter+=1;
393 }
394 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
395 }
396 }
397 TMPMEMFREE(Node_buffer);
398 return globalNumNodes;
399 }
400
401 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
402 {
403 index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k;
404 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
405 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
406 #ifdef PASO_MPI
407 MPI_Status status;
408 #endif
409
410 /* get the global range of node ids */
411 Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in);
412
413 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
414 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
415 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
416
417 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
418 /* distribute the range of node ids */
419 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution);
420 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
421 /* allocate buffers */
422 Nodes_buffer=TMPMEMALLOC(buffer_len,index_t);
423 if (! Finley_checkPtr(Nodes_buffer)) {
424 /* fill Nodes_buffer by the unset_node marker to check if nodes are defined */
425 #pragma omp parallel for private(n) schedule(static)
426 for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node;
427
428 /* fill the buffer by sending portions around in a circle */
429 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
430 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
431 buffer_rank=in->MPIInfo->rank;
432 for (p=0; p< in->MPIInfo->size; ++p) {
433 if (p>0) { /* the initial send can be skipped */
434 #ifdef PASO_MPI
435 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
436 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
437 in->MPIInfo->comm,&status);
438 #endif
439 in->MPIInfo->msg_tag_counter++;
440 }
441 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
442 node_0=distribution[buffer_rank];
443 node_1=distribution[buffer_rank+1];
444 #pragma omp parallel for private(n,k) schedule(static)
445 for (n=0;n<in->numNodes;n++) {
446 if (reducedNodeMask[n] >-1) {
447 k=in->globalNodesIndex[n];
448 if ((node_0<=k) && (k<node_1)) {
449 Nodes_buffer[k-node_0] = set_node;
450 }
451 }
452 }
453 }
454 /* count the entries in the Nodes_buffer */
455 /* TODO: OMP parallel */
456 myNewNodes=0;
457 for (n=0; n<myNodes; ++n) {
458 if ( Nodes_buffer[n] == set_node) {
459 Nodes_buffer[n]=myNewNodes;
460 myNewNodes++;
461 }
462 }
463 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
464 loc_offsets[in->MPIInfo->rank]=myNewNodes;
465 #ifdef PASO_MPI
466 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
467 globalNumReducedNodes=0;
468 for (n=0; n< in->MPIInfo->size; ++n) {
469 loc_offsets[n]=globalNumReducedNodes;
470 globalNumReducedNodes+=offsets[n];
471 }
472 #else
473 globalNumReducedNodes=loc_offsets[0];
474 loc_offsets[0]=0;
475 #endif
476 #pragma omp parallel for private(n) schedule(static)
477 for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank];
478 /* now entries are collected from the buffer again by sending the entries around in a circle */
479 #pragma omp parallel for private(n) schedule(static)
480 for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
481 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
482 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
483 buffer_rank=in->MPIInfo->rank;
484 for (p=0; p< in->MPIInfo->size; ++p) {
485 node_0=distribution[buffer_rank];
486 node_1=distribution[buffer_rank+1];
487 #pragma omp parallel for private(n,k) schedule(static)
488 for (n=0;n<in->numNodes;n++) {
489 if (reducedNodeMask[n] >-1) {
490 k=in->globalNodesIndex[n];
491 if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0];
492 }
493 }
494 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
495 #ifdef PASO_MPI
496 MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,
497 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
498 in->MPIInfo->comm,&status);
499 #endif
500 in->MPIInfo->msg_tag_counter+=1;
501 }
502 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
503 }
504 }
505 TMPMEMFREE(Nodes_buffer);
506 }
507 TMPMEMFREE(distribution);
508 TMPMEMFREE(loc_offsets);
509 TMPMEMFREE(offsets);
510 return globalNumReducedNodes;
511 }

  ViewVC Help
Powered by ViewVC 1.1.26