/[escript]/trunk-mpi-branch/finley/src/NodeFile_createDenseLabelings.c
ViewVC logotype

Contents of /trunk-mpi-branch/finley/src/NodeFile_createDenseLabelings.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1260 - (show annotations)
Mon Aug 20 07:39:44 2007 UTC (11 years, 8 months ago) by gross
File MIME type: text/plain
File size: 23971 byte(s)
fix for no mpi version
1 /*
2 ************************************************************
3 * Copyright 2006 by ACcESS MNRF *
4 * *
5 * http://www.access.edu.au *
6 * Primary Business: Queensland, Australia *
7 * Licensed under the Open Software License version 3.0 *
8 * http://www.opensource.org/licenses/osl-3.0.php *
9 * *
10 ************************************************************
11 */
12
13 /**************************************************************/
14
15 /* Finley: Mesh: NodeFile */
16
17 /* creates a dense labeling of the global degrees of freedom */
18 /* and returns the new number of global degrees of freedom */
19
20 /**************************************************************/
21
22 /* Author: gross@access.edu.au */
23 /* Version: $Id:$ */
24
25 /**************************************************************/
26
27 #include "NodeFile.h"
28
29 /**************************************************************/
30
31 dim_t Finley_NodeFile_createDenseDOFLabeling(Finley_NodeFile* in)
32 {
33 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
34 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
35 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, new_numGlobalDOFs=0, myNewDOFs;
36 bool_t *set_new_DOF=NULL;
37 #ifdef PASO_MPI
38 MPI_Status status;
39 #endif
40
41 /* get the global range of node ids */
42 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
43
44 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
45 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
46 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
47 set_new_DOF=TMPMEMALLOC(in->numNodes, bool_t);
48
49 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) || Finley_checkPtr(set_new_DOF)) ) {
50 /* distribute the range of node ids */
51 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
52 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
53 /* allocate buffers */
54 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
55 if (! Finley_checkPtr(DOF_buffer)) {
56 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
57 #pragma omp parallel for private(n) schedule(static)
58 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
59
60 /* fill the buffer by sending portions around in a circle */
61 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
62 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
63 buffer_rank=in->MPIInfo->rank;
64 for (p=0; p< in->MPIInfo->size; ++p) {
65 if (p>0) { /* the initial send can be skipped */
66 #ifdef PASO_MPI
67 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
68 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
69 in->MPIInfo->comm,&status);
70 #endif
71 in->MPIInfo->msg_tag_counter++;
72 }
73 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
74 dof_0=distribution[buffer_rank];
75 dof_1=distribution[buffer_rank+1];
76 #pragma omp parallel for private(n,k) schedule(static)
77 for (n=0;n<in->numNodes;n++) {
78 k=in->globalDegreesOfFreedom[n];
79 if ((dof_0<=k) && (k<dof_1)) {
80 DOF_buffer[k-dof_0] = set_dof;
81 }
82 }
83 }
84 /* count the entries in the DOF_buffer */
85 /* TODO: OMP parallel */
86 myNewDOFs=0;
87 for (n=0; n<myDOFs; ++n) {
88 if ( DOF_buffer[n] == set_dof) {
89 DOF_buffer[n]=myNewDOFs;
90 myNewDOFs++;
91 }
92 }
93 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
94 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
95 #ifdef PASO_MPI
96 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
97 new_numGlobalDOFs=0;
98 for (n=0; n< in->MPIInfo->size; ++n) {
99 loc_offsets[n]=new_numGlobalDOFs;
100 new_numGlobalDOFs+=offsets[n];
101 }
102 #else
103 new_numGlobalDOFs=loc_offsets[0];
104 loc_offsets[0]=0;
105 #endif
106 #pragma omp parallel for private(n) schedule(static)
107 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
108 /* now entries are collected from the buffer again by sending the entries around in a circle */
109 #pragma omp parallel for private(n) schedule(static)
110 for (n=0; n<in->numNodes; ++n) set_new_DOF[n]=TRUE;
111 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
112 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
113 buffer_rank=in->MPIInfo->rank;
114 for (p=0; p< in->MPIInfo->size; ++p) {
115 dof_0=distribution[buffer_rank];
116 dof_1=distribution[buffer_rank+1];
117 #pragma omp parallel for private(n,k) schedule(static)
118 for (n=0;n<in->numNodes;n++) {
119 k=in->globalDegreesOfFreedom[n];
120 if ( set_new_DOF[n] && (dof_0<=k) && (k<dof_1)) {
121 in->globalDegreesOfFreedom[n]=DOF_buffer[k-dof_0];
122 set_new_DOF[n]=FALSE;
123 }
124 }
125 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
126 #ifdef PASO_MPI
127 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
128 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
129 in->MPIInfo->comm,&status);
130 #endif
131 in->MPIInfo->msg_tag_counter+=1;
132 }
133 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
134 }
135 }
136 TMPMEMFREE(DOF_buffer);
137 }
138 TMPMEMFREE(distribution);
139 TMPMEMFREE(loc_offsets);
140 TMPMEMFREE(offsets);
141 TMPMEMFREE(set_new_DOF);
142 in->isPrepared=FINLEY_UNPREPARED;
143 return new_numGlobalDOFs;
144 }
145
146 void Finley_NodeFile_assignMPIRankToDOFs(Finley_NodeFile* in,Paso_MPI_rank* mpiRankOfDOF, index_t *distribution){
147 index_t min_DOF,max_DOF, k;
148 dim_t n;
149 Paso_MPI_rank p, p_min=in->MPIInfo->size, p_max=-1;
150 /* first we calculate the min and max dof on this processor to reduce costs for seraching */
151 Finley_NodeFile_setDOFRange(&min_DOF,&max_DOF,in);
152
153 for (p=0; p<in->MPIInfo->size; ++p) {
154 if (distribution[p]<=min_DOF) p_min=p;
155 if (distribution[p]<=max_DOF) p_max=p;
156 }
157 #pragma omp parallel for private(n,k,p) schedule(static)
158 for (n=0; n<in->numNodes; ++n) {
159 k=in->globalDegreesOfFreedom[n];
160 for (p=p_min; p<=p_max; ++p) {
161 if (k<distribution[p+1]) {
162 mpiRankOfDOF[n]=p;
163 break;
164 }
165 }
166 }
167 }
168 dim_t Finley_NodeFile_createDenseReducedDOFLabeling(Finley_NodeFile* in,index_t* reducedNodeMask)
169 {
170 index_t min_dof, max_dof, unset_dof=-1,set_dof=1, dof_0, dof_1, *DOF_buffer=NULL, k;
171 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
172 dim_t p, buffer_len,n, myDOFs, *offsets=NULL, *loc_offsets=NULL, globalNumReducedDOFs=0, myNewDOFs;
173 #ifdef PASO_MPI
174 MPI_Status status;
175 #endif
176
177 /* get the global range of node ids */
178 Finley_NodeFile_setGlobalDOFRange(&min_dof,&max_dof,in);
179
180 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
181 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
182 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
183
184 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) {
185 /* distribute the range of node ids */
186 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_dof,max_dof,distribution);
187 myDOFs=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
188 /* allocate buffers */
189 DOF_buffer=TMPMEMALLOC(buffer_len,index_t);
190 if (! Finley_checkPtr(DOF_buffer)) {
191 /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */
192 #pragma omp parallel for private(n) schedule(static)
193 for (n=0;n<buffer_len;n++) DOF_buffer[n]=unset_dof;
194
195 /* fill the buffer by sending portions around in a circle */
196 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
197 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
198 buffer_rank=in->MPIInfo->rank;
199 for (p=0; p< in->MPIInfo->size; ++p) {
200 if (p>0) { /* the initial send can be skipped */
201 #ifdef PASO_MPI
202 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
203 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
204 in->MPIInfo->comm,&status);
205 #endif
206 in->MPIInfo->msg_tag_counter++;
207 }
208 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
209 dof_0=distribution[buffer_rank];
210 dof_1=distribution[buffer_rank+1];
211 #pragma omp parallel for private(n,k) schedule(static)
212 for (n=0;n<in->numNodes;n++) {
213 if (reducedNodeMask[n] >-1) {
214 k=in->globalDegreesOfFreedom[n];
215 if ((dof_0<=k) && (k<dof_1)) {
216 DOF_buffer[k-dof_0] = set_dof;
217 }
218 }
219 }
220 }
221 /* count the entries in the DOF_buffer */
222 /* TODO: OMP parallel */
223 myNewDOFs=0;
224 for (n=0; n<myDOFs; ++n) {
225 if ( DOF_buffer[n] == set_dof) {
226 DOF_buffer[n]=myNewDOFs;
227 myNewDOFs++;
228 }
229 }
230 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
231 loc_offsets[in->MPIInfo->rank]=myNewDOFs;
232 #ifdef PASO_MPI
233 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
234 globalNumReducedDOFs=0;
235 for (n=0; n< in->MPIInfo->size; ++n) {
236 loc_offsets[n]=globalNumReducedDOFs;
237 globalNumReducedDOFs+=offsets[n];
238 }
239 #else
240 globalNumReducedDOFs=loc_offsets[0];
241 loc_offsets[n]=0;
242 #endif
243 #pragma omp parallel for private(n) schedule(static)
244 for (n=0; n<myDOFs; ++n) DOF_buffer[n]+=loc_offsets[in->MPIInfo->rank];
245 /* now entries are collected from the buffer again by sending the entries around in a circle */
246 #pragma omp parallel for private(n) schedule(static)
247 for (n=0; n<in->numNodes; ++n) in->globalReducedDOFIndex[n]=-1;
248 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
249 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
250 buffer_rank=in->MPIInfo->rank;
251 for (p=0; p< in->MPIInfo->size; ++p) {
252 dof_0=distribution[buffer_rank];
253 dof_1=distribution[buffer_rank+1];
254 #pragma omp parallel for private(n,k) schedule(static)
255 for (n=0;n<in->numNodes;n++) {
256 if (reducedNodeMask[n] >-1) {
257 k=in->globalDegreesOfFreedom[n];
258 if ( (dof_0<=k) && (k<dof_1)) in->globalReducedDOFIndex[n]=DOF_buffer[k-dof_0];
259 }
260 }
261 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
262 #ifdef PASO_MPI
263 MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,
264 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
265 in->MPIInfo->comm,&status);
266 #endif
267 in->MPIInfo->msg_tag_counter+=1;
268 }
269 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
270 }
271 }
272 TMPMEMFREE(DOF_buffer);
273 }
274 TMPMEMFREE(distribution);
275 TMPMEMFREE(loc_offsets);
276 TMPMEMFREE(offsets);
277 in->isPrepared=FINLEY_UNPREPARED;
278 return globalNumReducedDOFs;
279 }
280 dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in)
281 {
282 index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k;
283 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
284 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumNodes=0, myNewNodes;
285 #ifdef PASO_MPI
286 MPI_Status status;
287 #endif
288
289 /* get the global range of node ids */
290 Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in);
291
292 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
293 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
294 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
295
296 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets)) ) {
297 /* distribute the range of node ids */
298 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution);
299 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
300 /* allocate buffers */
301 Node_buffer=TMPMEMALLOC(buffer_len,index_t);
302 if (! Finley_checkPtr(Node_buffer)) {
303 /* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */
304 #pragma omp parallel for private(n) schedule(static)
305 for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID;
306
307 /* fill the buffer by sending portions around in a circle */
308 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
309 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
310 buffer_rank=in->MPIInfo->rank;
311 for (p=0; p< in->MPIInfo->size; ++p) {
312 if (p>0) { /* the initial send can be skipped */
313 #ifdef PASO_MPI
314 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
315 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
316 in->MPIInfo->comm,&status);
317 #endif
318 in->MPIInfo->msg_tag_counter++;
319 }
320 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
321 nodeID_0=distribution[buffer_rank];
322 nodeID_1=distribution[buffer_rank+1];
323 #pragma omp parallel for private(n,k) schedule(static)
324 for (n=0;n<in->numNodes;n++) {
325 k=in->Id[n];
326 if ((nodeID_0<=k) && (k<nodeID_1)) {
327 Node_buffer[k-nodeID_0] = set_nodeID;
328 }
329 }
330 }
331 /* count the entries in the Node_buffer */
332 /* TODO: OMP parallel */
333 myNewNodes=0;
334 for (n=0; n<myNodes; ++n) {
335 if ( Node_buffer[n] == set_nodeID) {
336 Node_buffer[n]=myNewNodes;
337 myNewNodes++;
338 }
339 }
340 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
341 loc_offsets[in->MPIInfo->rank]=myNewNodes;
342 #ifdef PASO_MPI
343 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
344 globalNumNodes=0;
345 for (n=0; n< in->MPIInfo->size; ++n) {
346 loc_offsets[n]=globalNumNodes;
347 globalNumNodes+=offsets[n];
348 }
349 #else
350 globalNumNodes=loc_offsets[0];
351 loc_offsets[n]=0;
352 #endif
353 #pragma omp parallel for private(n) schedule(static)
354 for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank];
355 /* now entries are collected from the buffer again by sending the entries around in a circle */
356 #pragma omp parallel for private(n) schedule(static)
357 for (n=0; n<in->numNodes; ++n) in->globalNodesIndex[n]=-1;
358 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
359 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
360 buffer_rank=in->MPIInfo->rank;
361 for (p=0; p< in->MPIInfo->size; ++p) {
362 nodeID_0=distribution[buffer_rank];
363 nodeID_1=distribution[buffer_rank+1];
364 #pragma omp parallel for private(n,k) schedule(static)
365 for (n=0;n<in->numNodes;n++) {
366 k=in->Id[n];
367 if ( ( nodeID_0<=k) && (k<nodeID_1) ) in->globalNodesIndex[n]=Node_buffer[k-nodeID_0];
368 }
369 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
370 #ifdef PASO_MPI
371 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
372 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
373 in->MPIInfo->comm,&status);
374 #endif
375 in->MPIInfo->msg_tag_counter+=1;
376 }
377 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
378 }
379 }
380 TMPMEMFREE(Node_buffer);
381 }
382 TMPMEMFREE(distribution);
383 TMPMEMFREE(loc_offsets);
384 TMPMEMFREE(offsets);
385 in->isPrepared=FINLEY_UNPREPARED;
386 return globalNumNodes;
387 }
388
389 dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in, index_t* reducedNodeMask)
390 {
391 index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k;
392 Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL;
393 dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes;
394 #ifdef PASO_MPI
395 MPI_Status status;
396 #endif
397
398 /* get the global range of node ids */
399 Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in);
400
401 distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t);
402 offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
403 loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t);
404
405 if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) )) {
406 /* distribute the range of node ids */
407 buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution);
408 myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank];
409 /* allocate buffers */
410 Node_buffer=TMPMEMALLOC(buffer_len,index_t);
411 if (! Finley_checkPtr(Node_buffer)) {
412 /* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */
413 #pragma omp parallel for private(n) schedule(static)
414 for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID;
415
416 /* fill the buffer by sending portions around in a circle */
417 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
418 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
419 buffer_rank=in->MPIInfo->rank;
420 for (p=0; p< in->MPIInfo->size; ++p) {
421 if (p>0) { /* the initial send can be skipped */
422 #ifdef PASO_MPI
423 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
424 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
425 in->MPIInfo->comm,&status);
426 #endif
427 in->MPIInfo->msg_tag_counter++;
428 }
429 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
430 nodeID_0=distribution[buffer_rank];
431 nodeID_1=distribution[buffer_rank+1];
432 #pragma omp parallel for private(n,k) schedule(static)
433 for (n=0;n<in->numNodes;n++) {
434 if (reducedNodeMask[n] >-1) {
435 k=in->Id[n];
436 if ((nodeID_0<=k) && (k<nodeID_1)) {
437 Node_buffer[k-nodeID_0] = set_nodeID;
438 }
439 }
440 }
441 }
442 /* count the entries in the Node_buffer */
443 /* TODO: OMP parallel */
444 myNewNodes=0;
445 for (n=0; n<myNodes; ++n) {
446 if ( Node_buffer[n] == set_nodeID) {
447 Node_buffer[n]=myNewNodes;
448 myNewNodes++;
449 }
450 }
451 memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t));
452 loc_offsets[in->MPIInfo->rank]=myNewNodes;
453 #ifdef PASO_MPI
454 MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm );
455 globalNumReducedNodes=0;
456 for (n=0; n< in->MPIInfo->size; ++n) {
457 loc_offsets[n]=globalNumReducedNodes;
458 globalNumReducedNodes+=offsets[n];
459 }
460 #else
461 globalNumReducedNodes=loc_offsets[0];
462 loc_offsets[n]=0;
463 #endif
464 #pragma omp parallel for private(n) schedule(static)
465 for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank];
466 /* now entries are collected from the buffer again by sending the entries around in a circle */
467 #pragma omp parallel for private(n) schedule(static)
468 for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=TRUE;
469 dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1);
470 source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1);
471 buffer_rank=in->MPIInfo->rank;
472 for (p=0; p< in->MPIInfo->size; ++p) {
473 nodeID_0=distribution[buffer_rank];
474 nodeID_1=distribution[buffer_rank+1];
475 #pragma omp parallel for private(n,k) schedule(static)
476 for (n=0;n<in->numNodes;n++) {
477 if (reducedNodeMask[n] >-1) {
478 k=in->Id[n];
479 if ( (nodeID_0<=k) && (k<nodeID_1)) in->globalReducedNodesIndex[n]=Node_buffer[k-nodeID_0];
480 }
481 }
482 if (p<in->MPIInfo->size-1) { /* the last send can be skipped */
483 #ifdef PASO_MPI
484 MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT,
485 dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter,
486 in->MPIInfo->comm,&status);
487 #endif
488 in->MPIInfo->msg_tag_counter+=1;
489 }
490 buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
491 }
492 }
493 TMPMEMFREE(Node_buffer);
494 }
495 TMPMEMFREE(distribution);
496 TMPMEMFREE(loc_offsets);
497 TMPMEMFREE(offsets);
498 in->isPrepared=FINLEY_UNPREPARED;
499 return globalNumReducedNodes;
500 }

  ViewVC Help
Powered by ViewVC 1.1.26