273 |
TMPMEMFREE(offsets); |
TMPMEMFREE(offsets); |
274 |
return globalNumReducedDOFs; |
return globalNumReducedDOFs; |
275 |
} |
} |
276 |
dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in) |
dim_t Finley_NodeFile_createDenseNodeLabeling(Finley_NodeFile* in, index_t* node_distribution, const index_t* dof_distribution) |
277 |
{ |
{ |
278 |
index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k; |
index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0, dof_1, *Node_buffer=NULL; |
279 |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
dim_t n, my_buffer_len, buffer_len, globalNumNodes, myNewNumNodes; |
280 |
dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumNodes=0, myNewNodes; |
Paso_MPI_rank p, dest, source, buffer_rank; |
281 |
|
const index_t unset_nodeID=-1, set_nodeID=1; |
282 |
|
const dim_t header_len=2; |
283 |
#ifdef PASO_MPI |
#ifdef PASO_MPI |
284 |
MPI_Status status; |
MPI_Status status; |
285 |
#endif |
#endif |
286 |
|
Paso_MPI_rank myRank=in->MPIInfo->rank; |
287 |
|
|
288 |
/* get the global range of node ids */ |
/* find the range of node ids controled by me */ |
|
Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in); |
|
289 |
|
|
290 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
myFirstDOF=dof_distribution[in->MPIInfo->rank]; |
291 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
myLastDOF=dof_distribution[in->MPIInfo->rank+1]; |
292 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
max_id=-INDEX_T_MAX; |
293 |
|
min_id=INDEX_T_MAX; |
294 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets)) ) { |
#pragma omp parallel private(loc_max_id,loc_min_id) |
295 |
/* distribute the range of node ids */ |
{ |
296 |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution); |
loc_max_id=max_id; |
297 |
myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
loc_min_id=min_id; |
298 |
/* allocate buffers */ |
#pragma omp for private(n,dof,id) schedule(static) |
299 |
Node_buffer=TMPMEMALLOC(buffer_len,index_t); |
for (n=0;n<in->numNodes;n++) { |
300 |
if (! Finley_checkPtr(Node_buffer)) { |
dof=in->globalDegreesOfFreedom[n]; |
301 |
/* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */ |
id=in->Id[n]; |
302 |
#pragma omp parallel for private(n) schedule(static) |
if ((myFirstDOF<= dof) && (dof< myLastDOF)) { |
303 |
for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID; |
loc_max_id=MAX(loc_max_id,id); |
304 |
|
loc_min_id=MIN(loc_min_id,id); |
305 |
/* fill the buffer by sending portions around in a circle */ |
} |
306 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
} |
307 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
#pragma omp critical |
308 |
buffer_rank=in->MPIInfo->rank; |
{ |
309 |
for (p=0; p< in->MPIInfo->size; ++p) { |
max_id=MAX(loc_max_id,max_id); |
310 |
if (p>0) { /* the initial send can be skipped */ |
min_id=MIN(loc_min_id,min_id); |
311 |
#ifdef PASO_MPI |
} |
312 |
MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT, |
} |
313 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
/* allocate a buffer */ |
314 |
in->MPIInfo->comm,&status); |
my_buffer_len=max_id> min_id ? max_id-min_id+1 :0; |
315 |
#endif |
|
316 |
in->MPIInfo->msg_tag_counter++; |
#ifdef PASO_MPI |
317 |
} |
MPI_Allreduce( &my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm ); |
318 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
#else |
319 |
nodeID_0=distribution[buffer_rank]; |
buffer_len=my_buffer_len; |
320 |
nodeID_1=distribution[buffer_rank+1]; |
#endif |
321 |
#pragma omp parallel for private(n,k) schedule(static) |
|
322 |
for (n=0;n<in->numNodes;n++) { |
Node_buffer=TMPMEMALLOC(buffer_len+header_len,index_t); |
323 |
k=in->Id[n]; |
if (! Finley_checkPtr(Node_buffer)) { |
324 |
if ((nodeID_0<=k) && (k<nodeID_1)) { |
/* mark and count the nodes in use */ |
325 |
Node_buffer[k-nodeID_0] = set_nodeID; |
#pragma omp parallel |
326 |
} |
{ |
327 |
} |
#pragma omp for private(n) schedule(static) |
328 |
} |
for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID; |
329 |
/* count the entries in the Node_buffer */ |
#pragma omp for private(n) schedule(static) |
330 |
/* TODO: OMP parallel */ |
for (n=0;n<in->numNodes;n++) in->globalNodesIndex[n]=-1; |
331 |
myNewNodes=0; |
#pragma omp for private(n,dof,id) schedule(static) |
332 |
for (n=0; n<myNodes; ++n) { |
for (n=0;n<in->numNodes;n++) { |
333 |
if ( Node_buffer[n] == set_nodeID) { |
dof=in->globalDegreesOfFreedom[n]; |
334 |
Node_buffer[n]=myNewNodes; |
id=in->Id[n]; |
335 |
myNewNodes++; |
if ((myFirstDOF<= dof) && (dof< myLastDOF)) Node_buffer[id-min_id+header_len]=set_nodeID; |
336 |
|
} |
337 |
|
} |
338 |
|
myNewNumNodes=0; |
339 |
|
for (n=0;n<my_buffer_len;n++) { |
340 |
|
if (Node_buffer[header_len+n]==set_nodeID) { |
341 |
|
Node_buffer[header_len+n]=myNewNumNodes; |
342 |
|
myNewNumNodes++; |
343 |
|
} |
344 |
|
} |
345 |
|
/* make the local number of nodes globally available */ |
346 |
|
#ifdef PASO_MPI |
347 |
|
MPI_Allgather(&myNewNumNodes,1,MPI_INT,node_distribution,1,MPI_INT,in->MPIInfo->comm); |
348 |
|
#else |
349 |
|
node_distribution[0]=myNewNumNodes; |
350 |
|
#endif |
351 |
|
globalNumNodes=0; |
352 |
|
for (p=0; p< in->MPIInfo->size; ++p) { |
353 |
|
itmp=node_distribution[p]; |
354 |
|
node_distribution[p]=globalNumNodes; |
355 |
|
globalNumNodes+=itmp; |
356 |
|
} |
357 |
|
node_distribution[in->MPIInfo->size]=globalNumNodes; |
358 |
|
|
359 |
|
/* offset nodebuffer */ |
360 |
|
itmp=node_distribution[in->MPIInfo->rank]; |
361 |
|
#pragma omp for private(n) schedule(static) |
362 |
|
for (n=0;n<my_buffer_len;n++) Node_buffer[n+header_len]+=itmp; |
363 |
|
|
364 |
|
/* now we send this buffer around to assign global node index: */ |
365 |
|
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
366 |
|
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
367 |
|
Node_buffer[0]=min_id; |
368 |
|
Node_buffer[1]=max_id; |
369 |
|
buffer_rank=in->MPIInfo->rank; |
370 |
|
for (p=0; p< in->MPIInfo->size; ++p) { |
371 |
|
nodeID_0=Node_buffer[0]; |
372 |
|
nodeID_1=Node_buffer[1]; |
373 |
|
dof_0=dof_distribution[buffer_rank]; |
374 |
|
dof_1=dof_distribution[buffer_rank+1]; |
375 |
|
if (nodeID_0<=nodeID_1) { |
376 |
|
#pragma omp for private(n,dof,id) schedule(static) |
377 |
|
for (n=0;n<in->numNodes;n++) { |
378 |
|
dof=in->globalDegreesOfFreedom[n]; |
379 |
|
id=in->Id[n]-nodeID_0; |
380 |
|
if ( (dof_0<= dof) && (dof< dof_1) && (id>=0) && (id<=nodeID_1-nodeID_0)) in->globalNodesIndex[n]=Node_buffer[id+header_len]; |
381 |
} |
} |
382 |
} |
} |
383 |
memset(loc_offsets,0,in->MPIInfo->size*sizeof(dim_t)); |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
384 |
loc_offsets[in->MPIInfo->rank]=myNewNodes; |
#ifdef PASO_MPI |
385 |
#ifdef PASO_MPI |
MPI_Sendrecv_replace(Node_buffer, buffer_len+header_len, MPI_INT, |
386 |
MPI_Allreduce(loc_offsets,offsets,in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm ); |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
387 |
globalNumNodes=0; |
in->MPIInfo->comm,&status); |
388 |
for (n=0; n< in->MPIInfo->size; ++n) { |
#endif |
389 |
loc_offsets[n]=globalNumNodes; |
in->MPIInfo->msg_tag_counter+=1; |
390 |
globalNumNodes+=offsets[n]; |
} |
391 |
} |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
392 |
#else |
} |
393 |
globalNumNodes=loc_offsets[0]; |
} |
394 |
loc_offsets[0]=0; |
TMPMEMFREE(Node_buffer); |
|
#endif |
|
|
#pragma omp parallel for private(n) schedule(static) |
|
|
for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
|
|
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
|
|
#pragma omp parallel for private(n) schedule(static) |
|
|
for (n=0; n<in->numNodes; ++n) in->globalNodesIndex[n]=loc_offsets[0]-1; |
|
|
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
|
|
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
|
|
buffer_rank=in->MPIInfo->rank; |
|
|
for (p=0; p< in->MPIInfo->size; ++p) { |
|
|
nodeID_0=distribution[buffer_rank]; |
|
|
nodeID_1=distribution[buffer_rank+1]; |
|
|
#pragma omp parallel for private(n,k) schedule(static) |
|
|
for (n=0;n<in->numNodes;n++) { |
|
|
k=in->Id[n]; |
|
|
if ( ( nodeID_0<=k) && (k<nodeID_1) ) in->globalNodesIndex[n]=Node_buffer[k-nodeID_0]; |
|
|
} |
|
|
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
|
|
#ifdef PASO_MPI |
|
|
MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT, |
|
|
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
|
|
in->MPIInfo->comm,&status); |
|
|
#endif |
|
|
in->MPIInfo->msg_tag_counter+=1; |
|
|
} |
|
|
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
|
|
} |
|
|
} |
|
|
TMPMEMFREE(Node_buffer); |
|
|
} |
|
|
TMPMEMFREE(distribution); |
|
|
TMPMEMFREE(loc_offsets); |
|
|
TMPMEMFREE(offsets); |
|
|
return globalNumNodes; |
|
395 |
} |
} |
396 |
|
|
397 |
dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in, index_t* reducedNodeMask) |
dim_t Finley_NodeFile_createDenseReducedNodeLabeling(Finley_NodeFile* in,index_t* reducedNodeMask) |
398 |
{ |
{ |
399 |
index_t min_nodeID, max_nodeID, unset_nodeID=-1,set_nodeID=1, nodeID_0, nodeID_1, *Node_buffer=NULL, k; |
index_t min_node, max_node, unset_node=-1,set_node=1, node_0, node_1, *Nodes_buffer=NULL, k; |
400 |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
Paso_MPI_rank buffer_rank, dest, source, *distribution=NULL; |
401 |
dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes; |
dim_t p, buffer_len,n, myNodes, *offsets=NULL, *loc_offsets=NULL, globalNumReducedNodes=0, myNewNodes; |
402 |
#ifdef PASO_MPI |
#ifdef PASO_MPI |
404 |
#endif |
#endif |
405 |
|
|
406 |
/* get the global range of node ids */ |
/* get the global range of node ids */ |
407 |
Finley_NodeFile_setGlobalIdRange(&min_nodeID,&max_nodeID,in); |
Finley_NodeFile_setGlobalNodeIDIndexRange(&min_node,&max_node,in); |
408 |
|
|
409 |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
distribution=TMPMEMALLOC(in->MPIInfo->size+1, index_t); |
410 |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
411 |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
loc_offsets=TMPMEMALLOC(in->MPIInfo->size, dim_t); |
412 |
|
|
413 |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) )) { |
if ( ! (Finley_checkPtr(distribution) || Finley_checkPtr(offsets) || Finley_checkPtr(loc_offsets) ) ) { |
414 |
/* distribute the range of node ids */ |
/* distribute the range of node ids */ |
415 |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_nodeID,max_nodeID,distribution); |
buffer_len=Paso_MPIInfo_setDistribution(in->MPIInfo,min_node,max_node,distribution); |
416 |
myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
myNodes=distribution[in->MPIInfo->rank+1]-distribution[in->MPIInfo->rank]; |
417 |
/* allocate buffers */ |
/* allocate buffers */ |
418 |
Node_buffer=TMPMEMALLOC(buffer_len,index_t); |
Nodes_buffer=TMPMEMALLOC(buffer_len,index_t); |
419 |
if (! Finley_checkPtr(Node_buffer)) { |
if (! Finley_checkPtr(Nodes_buffer)) { |
420 |
/* fill Node_buffer by the unset_nodeID marker to check if nodes are defined */ |
/* fill Nodes_buffer by the unset_node marker to check if nodes are defined */ |
421 |
#pragma omp parallel for private(n) schedule(static) |
#pragma omp parallel for private(n) schedule(static) |
422 |
for (n=0;n<buffer_len;n++) Node_buffer[n]=unset_nodeID; |
for (n=0;n<buffer_len;n++) Nodes_buffer[n]=unset_node; |
423 |
|
|
424 |
/* fill the buffer by sending portions around in a circle */ |
/* fill the buffer by sending portions around in a circle */ |
425 |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
dest=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank + 1); |
428 |
for (p=0; p< in->MPIInfo->size; ++p) { |
for (p=0; p< in->MPIInfo->size; ++p) { |
429 |
if (p>0) { /* the initial send can be skipped */ |
if (p>0) { /* the initial send can be skipped */ |
430 |
#ifdef PASO_MPI |
#ifdef PASO_MPI |
431 |
MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT, |
MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT, |
432 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
433 |
in->MPIInfo->comm,&status); |
in->MPIInfo->comm,&status); |
434 |
#endif |
#endif |
435 |
in->MPIInfo->msg_tag_counter++; |
in->MPIInfo->msg_tag_counter++; |
436 |
} |
} |
437 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
438 |
nodeID_0=distribution[buffer_rank]; |
node_0=distribution[buffer_rank]; |
439 |
nodeID_1=distribution[buffer_rank+1]; |
node_1=distribution[buffer_rank+1]; |
440 |
#pragma omp parallel for private(n,k) schedule(static) |
#pragma omp parallel for private(n,k) schedule(static) |
441 |
for (n=0;n<in->numNodes;n++) { |
for (n=0;n<in->numNodes;n++) { |
442 |
if (reducedNodeMask[n] >-1) { |
if (reducedNodeMask[n] >-1) { |
443 |
k=in->Id[n]; |
k=in->globalNodesIndex[n]; |
444 |
if ((nodeID_0<=k) && (k<nodeID_1)) { |
if ((node_0<=k) && (k<node_1)) { |
445 |
Node_buffer[k-nodeID_0] = set_nodeID; |
Nodes_buffer[k-node_0] = set_node; |
446 |
} |
} |
447 |
} |
} |
448 |
} |
} |
449 |
} |
} |
450 |
/* count the entries in the Node_buffer */ |
/* count the entries in the Nodes_buffer */ |
451 |
/* TODO: OMP parallel */ |
/* TODO: OMP parallel */ |
452 |
myNewNodes=0; |
myNewNodes=0; |
453 |
for (n=0; n<myNodes; ++n) { |
for (n=0; n<myNodes; ++n) { |
454 |
if ( Node_buffer[n] == set_nodeID) { |
if ( Nodes_buffer[n] == set_node) { |
455 |
Node_buffer[n]=myNewNodes; |
Nodes_buffer[n]=myNewNodes; |
456 |
myNewNodes++; |
myNewNodes++; |
457 |
} |
} |
458 |
} |
} |
470 |
loc_offsets[0]=0; |
loc_offsets[0]=0; |
471 |
#endif |
#endif |
472 |
#pragma omp parallel for private(n) schedule(static) |
#pragma omp parallel for private(n) schedule(static) |
473 |
for (n=0; n<myNodes; ++n) Node_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
for (n=0; n<myNodes; ++n) Nodes_buffer[n]+=loc_offsets[in->MPIInfo->rank]; |
474 |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
/* now entries are collected from the buffer again by sending the entries around in a circle */ |
475 |
#pragma omp parallel for private(n) schedule(static) |
#pragma omp parallel for private(n) schedule(static) |
476 |
for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1; |
for (n=0; n<in->numNodes; ++n) in->globalReducedNodesIndex[n]=loc_offsets[0]-1; |
478 |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
source=Paso_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank - 1); |
479 |
buffer_rank=in->MPIInfo->rank; |
buffer_rank=in->MPIInfo->rank; |
480 |
for (p=0; p< in->MPIInfo->size; ++p) { |
for (p=0; p< in->MPIInfo->size; ++p) { |
481 |
nodeID_0=distribution[buffer_rank]; |
node_0=distribution[buffer_rank]; |
482 |
nodeID_1=distribution[buffer_rank+1]; |
node_1=distribution[buffer_rank+1]; |
483 |
#pragma omp parallel for private(n,k) schedule(static) |
#pragma omp parallel for private(n,k) schedule(static) |
484 |
for (n=0;n<in->numNodes;n++) { |
for (n=0;n<in->numNodes;n++) { |
485 |
if (reducedNodeMask[n] >-1) { |
if (reducedNodeMask[n] >-1) { |
486 |
k=in->Id[n]; |
k=in->globalNodesIndex[n]; |
487 |
if ( (nodeID_0<=k) && (k<nodeID_1)) in->globalReducedNodesIndex[n]=Node_buffer[k-nodeID_0]; |
if ( (node_0<=k) && (k<node_1)) in->globalReducedNodesIndex[n]=Nodes_buffer[k-node_0]; |
488 |
} |
} |
489 |
} |
} |
490 |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
if (p<in->MPIInfo->size-1) { /* the last send can be skipped */ |
491 |
#ifdef PASO_MPI |
#ifdef PASO_MPI |
492 |
MPI_Sendrecv_replace(Node_buffer, buffer_len, MPI_INT, |
MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT, |
493 |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
dest, in->MPIInfo->msg_tag_counter, source, in->MPIInfo->msg_tag_counter, |
494 |
in->MPIInfo->comm,&status); |
in->MPIInfo->comm,&status); |
495 |
#endif |
#endif |
498 |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
buffer_rank=Paso_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1); |
499 |
} |
} |
500 |
} |
} |
501 |
TMPMEMFREE(Node_buffer); |
TMPMEMFREE(Nodes_buffer); |
502 |
} |
} |
503 |
TMPMEMFREE(distribution); |
TMPMEMFREE(distribution); |
504 |
TMPMEMFREE(loc_offsets); |
TMPMEMFREE(loc_offsets); |