/[escript]/trunk/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Contents of /trunk/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1388 - (show annotations)
Fri Jan 11 07:45:58 2008 UTC (11 years, 8 months ago) by trankine
File MIME type: text/plain
File size: 23616 byte(s)
And get the *(&(*&(* name right
1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /**************************************************************/
17
18 /* Finley: Mesh: NodeFile */
19
20 /* creates a dense labeling of the global degrees of freedom */
21 /* and returns the new number of global degrees of freedom */
22
23 /**************************************************************/
24
25 #include "NodeFile.h"
26
27 /**************************************************************/
28
29 dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
30 {
31 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
32 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
33 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
34 bool_t *set_new_DOF=NULL;
35 #ifdef PASO_MPI
36 MPI_Status status;
37 #endif
38
39 /* get the global range of node ids */
40 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
41
42 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
43 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
44 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
45 set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
46
47 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
48 /* distribute the range of node ids */
49 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
50 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
51 /* allocate buffers */
52 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
53 if (! Finley_checkPtr(DOF_buffer)) {
54 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
55 #pragma omp parallel for private(n) schedule(static)
56 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
57
58 /* fill the buffer by sending portions around in a circle */
59 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
60 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
61 buffer_rank=in->MPIInfo->rank;
62 for (p=0; p< in->MPIInfo->size; ++p) {
63 if (p>0) { /* the initial send can be skipped */
64 #ifdef PASO_MPI
65 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
66 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
67 in->MPIInfo->comm,&status);
68 #endif
69 in->MPIInfo->msg_tag_counter++;
70 }
71 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
72 dof_0=distribution[buffer_rank];
73 dof_1=distribution[buffer_rank+1];
74 #pragma omp parallel for private(n,k) schedule(static)
75 for (n=0;n<in->numNodes;n++) {
76 k=in->globalDegreesOfFreedom[n];
77 if ((dof_0<=k) && (k<dof_1)) {
78 DOF_buffer[k-dof_0] = set_dof;
79 }
80 }
81 }
82 /* count the entries in the DOF_buffer */
83 /* TODO: OMP parallel */
84 myNewDOFs=0;
85 for (n=0; n<myDOFs; ++n) {
86 if ( DOF_buffer[n] == set_dof) {
87 DOF_buffer[n]=myNewDOFs;
88 myNewDOFs++;
89 }
90 }
91 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
92 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
93 #ifdef PASO_MPI
94 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
95 new_numGlobalDOFs=0;
96 for (n=0; n< in->MPIInfo->size; ++n) {
97 loc_offsets[n]=new_numGlobalDOFs;
98 new_numGlobalDOFs+=offsets[n];
99 }
100 #else
101 new_numGlobalDOFs=loc_offsets[0];
102 loc_offsets[0]=0;
103 #endif
104 #pragma omp parallel for private(n) schedule(static)
105 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
106 /* now entries are collected from the buffer again by sending the entries around in a circle */
107 #pragma omp parallel for private(n) schedule(static)
108 for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
109 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
110 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
111 buffer_rank=in->MPIInfo->rank;
112 for (p=0; p< in->MPIInfo->size; ++p) {
113 dof_0=distribution[buffer_rank];
114 dof_1=distribution[buffer_rank+1];
115 #pragma omp parallel for private(n,k) schedule(static)
116 for (n=0;n<in->numNodes;n++) {
117 k=in->globalDegreesOfFreedom[n];
118 if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
119 in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
120 set_new_DOF[n]=FALSE;
121 }
122 }
123 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
124 #ifdef PASO_MPI
125 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
126 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
127 in->MPIInfo->comm,&status);
128 #endif
129 in->MPIInfo->msg_tag_counter+=1;
130 }
131 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
132 }
133 }
134 TMPMEMFREE(DOF_buffer);
135 }
136 TMPMEMFREE(distribution);
137 TMPMEMFREE(loc_offsets);
138 TMPMEMFREE(offsets);
139 TMPMEMFREE(set_new_DOF);
140 return new_numGlobalDOFs;
141 }
142
143 void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){
144 index_t min_DOF,max_DOF, k;
145 dim_t n;
146 Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
147 /* first we calculate the min and max dof on this processor to reduce costs for seraching */
148 Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
149
150 for (p=0; p<in->MPIInfo->size; ++p) {
151 if (distribution[p]<=min_DOF) p_min=p;
152 if (distribution[p]<=max_DOF) p_max=p;
153 }
154 #pragma omp parallel for private(n,k,p) schedule(static)
155 for (n=0; n<in->numNodes; ++n) {
156 k=in->globalDegreesOfFreedom[n];
157 for (p=p_min; p<=p_max; ++p) {
158 if (k<distribution[p+1]) {
159 mpiRankOfDOF[n]=p;
160 break;
161 }
162 }
163 }
164 }
165 dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
166 {
167 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
168 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
169 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
170 #ifdef PASO_MPI
171 MPI_Status status;
172 #endif
173
174 /* get the global range of node ids */
175 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
176
177 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
178 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
179 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
180
181 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
182 /* distribute the range of node ids */
183 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
184 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
185 /* allocate buffers */
186 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
187 if (! Finley_checkPtr(DOF_buffer)) {
188 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
189 #pragma omp parallel for private(n) schedule(static)
190 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
191
192 /* fill the buffer by sending portions around in a circle */
193 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
194 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
195 buffer_rank=in->MPIInfo->rank;
196 for (p=0; p< in->MPIInfo->size; ++p) {
197 if (p>0) { /* the initial send can be skipped */
198 #ifdef PASO_MPI
199 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
200 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
201 in->MPIInfo->comm,&status);
202 #endif
203 in->MPIInfo->msg_tag_counter++;
204 }
205 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
206 dof_0=distribution[buffer_rank];
207 dof_1=distribution[buffer_rank+1];
208 #pragma omp parallel for private(n,k) schedule(static)
209 for (n=0;n<in->numNodes;n++) {
210 if (reducedNodeMask[n] >-1) {
211 k=in->globalDegreesOfFreedom[n];
212 if ((dof_0<=k) && (k<dof_1)) {
213 DOF_buffer[k-dof_0] = set_dof;
214 }
215 }
216 }
217 }
218 /* count the entries in the DOF_buffer */
219 /* TODO: OMP parallel */
220 myNewDOFs=0;
221 for (n=0; n<myDOFs; ++n) {
222 if ( DOF_buffer[n] == set_dof) {
223 DOF_buffer[n]=myNewDOFs;
224 myNewDOFs++;
225 }
226 }
227 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
228 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
229 #ifdef PASO_MPI
230 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
231 globalNumReducedDOFs=0;
232 for (n=0; n< in->MPIInfo->size; ++n) {
233 loc_offsets[n]=globalNumReducedDOFs;
234 globalNumReducedDOFs+=offsets[n];
235 }
236 #else
237 globalNumReducedDOFs=loc_offsets[0];
238 loc_offsets[0]=0;
239 #endif
240 #pragma omp parallel for private(n) schedule(static)
241 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
242 /* now entries are collected from the buffer again by sending the entries around in a circle */
243 #pragma omp parallel for private(n) schedule(static)
244 for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=loc_offsets[0]-1;
245 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
246 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
247 buffer_rank=in->MPIInfo->rank;
248 for (p=0; p< in->MPIInfo->size; ++p) {
249 dof_0=distribution[buffer_rank];
250 dof_1=distribution[buffer_rank+1];
251 #pragma omp parallel for private(n,k) schedule(static)
252 for (n=0;n<in->numNodes;n++) {
253 if (reducedNodeMask[n] >-1) {
254 k=in->globalDegreesOfFreedom[n];
255 if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
256 }
257 }
258 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
259 #ifdef PASO_MPI
260 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
261 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
262 in->MPIInfo->comm,&status);
263 #endif
264 in->MPIInfo->msg_tag_counter+=1;
265 }
266 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
267 }
268 }
269 TMPMEMFREE(DOF_buffer);
270 }
271 TMPMEMFREE(distribution);
272 TMPMEMFREE(loc_offsets);
273 TMPMEMFREE(offsets);
274 return globalNumReducedDOFs;
275 }
276 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in)
277 {
278 index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k;
279 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
280 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumNodes=0, myNewNodes;
281 #ifdef PASO_MPI
282 MPI_Status status;
283 #endif
284
285 /* get the global range of node ids */
286 Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in);
287
288 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
289 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
290 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
291
292 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets)) ) {
293 /* distribute the range of node ids */
294 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution);
295 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
296 /* allocate buffers */
297 Node_buffer=TMPMEMALLOC(buffer_len,index_t);
298 if (! Finley_checkPtr(Node_buffer)) {
299 /* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */
300 #pragma omp parallel for private(n) schedule(static)
301 for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID;
302
303 /* fill the buffer by sending portions around in a circle */
304 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
305 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
306 buffer_rank=in->MPIInfo->rank;
307 for (p=0; p< in->MPIInfo->size; ++p) {
308 if (p>0) { /* the initial send can be skipped */
309 #ifdef PASO_MPI
310 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
311 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
312 in->MPIInfo->comm,&status);
313 #endif
314 in->MPIInfo->msg_tag_counter++;
315 }
316 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
317 nodeID_0=distribution[buffer_rank];
318 nodeID_1=distribution[buffer_rank+1];
319 #pragma omp parallel for private(n,k) schedule(static)
320 for (n=0;n<in->numNodes;n++) {
321 k=in->Id[n];
322 if ((nodeID_0<=k) && (k<nodeID_1)) {
323 Node_buffer[k-nodeID_0] = set_nodeID;
324 }
325 }
326 }
327 /* count the entries in the Node_buffer */
328 /* TODO: OMP parallel */
329 myNewNodes=0;
330 for (n=0; n<myNodes; ++n) {
331 if ( Node_buffer[n] == set_nodeID) {
332 Node_buffer[n]=myNewNodes;
333 myNewNodes++;
334 }
335 }
336 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
337 loc_offsets[in->MPIInfo->rank]=myNewNodes;
338 #ifdef PASO_MPI
339 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
340 globalNumNodes=0;
341 for (n=0; n< in->MPIInfo->size; ++n) {
342 loc_offsets[n]=globalNumNodes;
343 globalNumNodes+=offsets[n];
344 }
345 #else
346 globalNumNodes=loc_offsets[0];
347 loc_offsets[0]=0;
348 #endif
349 #pragma omp parallel for private(n) schedule(static)
350 for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank];
351 /* now entries are collected from the buffer again by sending the entries around in a circle */
352 #pragma omp parallel for private(n) schedule(static)
353 for (n=0; n<in->numNodes; ++n) in->globalNodesIndex[n]=loc_offsets[0]-1;
354 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
355 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
356 buffer_rank=in->MPIInfo->rank;
357 for (p=0; p< in->MPIInfo->size; ++p) {
358 nodeID_0=distribution[buffer_rank];
359 nodeID_1=distribution[buffer_rank+1];
360 #pragma omp parallel for private(n,k) schedule(static)
361 for (n=0;n<in->numNodes;n++) {
362 k=in->Id[n];
363 if ( ( nodeID_0<=k) && (k<nodeID_1) ) in->globalNodesIndex[n]=Node_buffer[k-nodeID_0];
364 }
365 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
366 #ifdef PASO_MPI
367 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
368 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
369 in->MPIInfo->comm,&status);
370 #endif
371 in->MPIInfo->msg_tag_counter+=1;
372 }
373 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
374 }
375 }
376 TMPMEMFREE(Node_buffer);
377 }
378 TMPMEMFREE(distribution);
379 TMPMEMFREE(loc_offsets);
380 TMPMEMFREE(offsets);
381 return globalNumNodes;
382 }
383
384 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in, index_t* reducedNodeMask)
385 {
386 index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k;
387 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
388 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
389 #ifdef PASO_MPI
390 MPI_Status status;
391 #endif
392
393 /* get the global range of node ids */
394 Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in);
395
396 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
397 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
398 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
399
400 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) )) {
401 /* distribute the range of node ids */
402 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution);
403 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
404 /* allocate buffers */
405 Node_buffer=TMPMEMALLOC(buffer_len,index_t);
406 if (! Finley_checkPtr(Node_buffer)) {
407 /* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */
408 #pragma omp parallel for private(n) schedule(static)
409 for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID;
410
411 /* fill the buffer by sending portions around in a circle */
412 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
413 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
414 buffer_rank=in->MPIInfo->rank;
415 for (p=0; p< in->MPIInfo->size; ++p) {
416 if (p>0) { /* the initial send can be skipped */
417 #ifdef PASO_MPI
418 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
419 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
420 in->MPIInfo->comm,&status);
421 #endif
422 in->MPIInfo->msg_tag_counter++;
423 }
424 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
425 nodeID_0=distribution[buffer_rank];
426 nodeID_1=distribution[buffer_rank+1];
427 #pragma omp parallel for private(n,k) schedule(static)
428 for (n=0;n<in->numNodes;n++) {
429 if (reducedNodeMask[n] >-1) {
430 k=in->Id[n];
431 if ((nodeID_0<=k) && (k<nodeID_1)) {
432 Node_buffer[k-nodeID_0] = set_nodeID;
433 }
434 }
435 }
436 }
437 /* count the entries in the Node_buffer */
438 /* TODO: OMP parallel */
439 myNewNodes=0;
440 for (n=0; n<myNodes; ++n) {
441 if ( Node_buffer[n] == set_nodeID) {
442 Node_buffer[n]=myNewNodes;
443 myNewNodes++;
444 }
445 }
446 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
447 loc_offsets[in->MPIInfo->rank]=myNewNodes;
448 #ifdef PASO_MPI
449 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
450 globalNumReducedNodes=0;
451 for (n=0; n< in->MPIInfo->size; ++n) {
452 loc_offsets[n]=globalNumReducedNodes;
453 globalNumReducedNodes+=offsets[n];
454 }
455 #else
456 globalNumReducedNodes=loc_offsets[0];
457 loc_offsets[0]=0;
458 #endif
459 #pragma omp parallel for private(n) schedule(static)
460 for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank];
461 /* now entries are collected from the buffer again by sending the entries around in a circle */
462 #pragma omp parallel for private(n) schedule(static)
463 for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1;
464 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
465 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
466 buffer_rank=in->MPIInfo->rank;
467 for (p=0; p< in->MPIInfo->size; ++p) {
468 nodeID_0=distribution[buffer_rank];
469 nodeID_1=distribution[buffer_rank+1];
470 #pragma omp parallel for private(n,k) schedule(static)
471 for (n=0;n<in->numNodes;n++) {
472 if (reducedNodeMask[n] >-1) {
473 k=in->Id[n];
474 if ( (nodeID_0<=k) && (k<nodeID_1)) in->globalReducedNodesIndex[n]=Node_buffer[k-nodeID_0];
475 }
476 }
477 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
478 #ifdef PASO_MPI
479 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
480 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
481 in->MPIInfo->comm,&status);
482 #endif
483 in->MPIInfo->msg_tag_counter+=1;
484 }
485 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
486 }
487 }
488 TMPMEMFREE(Node_buffer);
489 }
490 TMPMEMFREE(distribution);
491 TMPMEMFREE(loc_offsets);
492 TMPMEMFREE(offsets);
493 return globalNumReducedNodes;
494 }

  ViewVC Help
Powered by ViewVC 1.1.26