230 |
return mesh_p; |
return mesh_p; |
231 |
} |
} |
232 |
|
|
233 |
Finley_Mesh* Finley_Mesh_read_MPI(char* fname,index_t order, index_t reduced_order, bool_t optimize) |
Finley_Mesh* Finley_Mesh_read_MPI(char* fname,index_t order, index_t reduced_order, bool_t optimize) |
234 |
|
|
235 |
{ |
{ |
236 |
|
|
243 |
double time0=Finley_timer(); |
double time0=Finley_timer(); |
244 |
FILE *fileHandle_p = NULL; |
FILE *fileHandle_p = NULL; |
245 |
ElementTypeId typeID, faceTypeID, contactTypeID, pointTypeID; |
ElementTypeId typeID, faceTypeID, contactTypeID, pointTypeID; |
246 |
|
|
247 |
Finley_resetError(); |
Finley_resetError(); |
248 |
|
|
249 |
if (mpi_info->rank == 0) { |
if (mpi_info->rank == 0) { |
255 |
Paso_MPIInfo_free( mpi_info ); |
Paso_MPIInfo_free( mpi_info ); |
256 |
return NULL; |
return NULL; |
257 |
} |
} |
258 |
|
|
259 |
/* read header */ |
/* read header */ |
260 |
sprintf(frm,"%%%d[^\n]",LenString_MAX-1); |
sprintf(frm,"%%%d[^\n]",LenString_MAX-1); |
261 |
fscanf(fileHandle_p, frm, name); |
fscanf(fileHandle_p, frm, name); |
262 |
|
|
263 |
/* get the nodes */ |
/* get the number of nodes */ |
|
|
|
264 |
fscanf(fileHandle_p, "%1d%*s %d\n", &numDim,&numNodes); |
fscanf(fileHandle_p, "%1d%*s %d\n", &numDim,&numNodes); |
265 |
} |
} |
266 |
|
|
285 |
} |
} |
286 |
} |
} |
287 |
#endif |
#endif |
|
printf("ksteube CPU=%d/%d name='%s' numDim=%d numNodes=%d\n", mpi_info->rank, mpi_info->size, name, numDim, numNodes); |
|
288 |
|
|
289 |
/* allocate mesh */ |
/* allocate mesh */ |
290 |
mesh_p = Finley_Mesh_alloc(name,numDim,order,reduced_order,mpi_info); |
mesh_p = Finley_Mesh_alloc(name,numDim,order,reduced_order,mpi_info); |
291 |
if (Finley_noError()) { |
if (Finley_noError()) { |
292 |
int chunkSize = numNodes / mpi_info->size + 1, totalNodes=0, chunkNodes=0, nextCPU=1, mpi_error; |
int chunkSize = numNodes / mpi_info->size + 1, totalNodes=0, chunkNodes=0, chunkEle=0, nextCPU=1, mpi_error; |
293 |
int *tempInts = TMPMEMALLOC(numNodes*3+1, index_t); |
int *tempInts = TMPMEMALLOC(numNodes*3+1, index_t); |
294 |
double *tempCoords = TMPMEMALLOC(numNodes*numDim, double); |
double *tempCoords = TMPMEMALLOC(numNodes*numDim, double); |
295 |
|
|
296 |
/* |
/* |
297 |
Read a chunk of nodes, send to worker CPU if necessary, copy chunk into local mesh_p |
Read a chunk of nodes, send to worker CPU if available, copy chunk into local mesh_p |
298 |
It doesn't matter that a CPU has the wrong nodes, this is sorted out later |
It doesn't matter that a CPU has the wrong nodes for its elements, this is sorted out later |
299 |
First chunk sent to CPU 1, second to CPU 2, ... |
First chunk sent to CPU 1, second to CPU 2, ... |
300 |
Last chunk stays on CPU 0 (the master) |
Last chunk stays on CPU 0 (the master) |
301 |
The three columns of integers (Id, gDOF, Tag) are gathered into a single array tempInts and sent together in a single MPI message |
The three columns of integers (Id, gDOF, Tag) are gathered into a single array tempInts and sent together in a single MPI message |
304 |
if (mpi_info->rank == 0) { /* Master */ |
if (mpi_info->rank == 0) { /* Master */ |
305 |
for (;;) { /* Infinite loop */ |
for (;;) { /* Infinite loop */ |
306 |
chunkNodes = 0; |
chunkNodes = 0; |
307 |
for (i0=0; i0<numNodes*3; i0++) tempInts[i0] = -1; |
for (i0=0; i0<numNodes*3+1; i0++) tempInts[i0] = -1; |
308 |
for (i0=0; i0<numNodes*numDim; i0++) tempCoords[i0] = -1.0; |
for (i0=0; i0<numNodes*numDim; i0++) tempCoords[i0] = -1.0; |
309 |
for (i1=0; i1<chunkSize; i1++) { |
for (i1=0; i1<chunkSize; i1++) { |
310 |
if (totalNodes >= numNodes) break; |
if (totalNodes >= numNodes) break; |
327 |
if (nextCPU < mpi_info->size) { |
if (nextCPU < mpi_info->size) { |
328 |
#ifdef PASO_MPI |
#ifdef PASO_MPI |
329 |
tempInts[numNodes*3] = chunkNodes; |
tempInts[numNodes*3] = chunkNodes; |
330 |
mpi_error = MPI_Send(tempInts, numNodes*3+1, MPI_INT, nextCPU, 8727, mpi_info->comm); |
/* ksteube The size of this message can and should be brought down to chunkNodes*3+1, must re-org tempInts */ |
331 |
|
mpi_error = MPI_Send(tempInts, numNodes*3+1, MPI_INT, nextCPU, 81720, mpi_info->comm); |
332 |
if ( mpi_error != MPI_SUCCESS ) { |
if ( mpi_error != MPI_SUCCESS ) { |
333 |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: send of tempInts failed"); |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: send of tempInts failed"); |
334 |
return NULL; |
return NULL; |
335 |
} |
} |
336 |
mpi_error = MPI_Send(tempCoords, numNodes*numDim, MPI_DOUBLE, nextCPU, 8728, mpi_info->comm); |
mpi_error = MPI_Send(tempCoords, numNodes*numDim, MPI_DOUBLE, nextCPU, 81721, mpi_info->comm); |
337 |
if ( mpi_error != MPI_SUCCESS ) { |
if ( mpi_error != MPI_SUCCESS ) { |
338 |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: send of tempCoords failed"); |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: send of tempCoords failed"); |
339 |
return NULL; |
return NULL; |
348 |
#ifdef PASO_MPI |
#ifdef PASO_MPI |
349 |
/* Each worker receives two messages */ |
/* Each worker receives two messages */ |
350 |
MPI_Status status; |
MPI_Status status; |
351 |
mpi_error = MPI_Recv(tempInts, numNodes*3+1, MPI_INT, 0, 8727, mpi_info->comm, &status); |
mpi_error = MPI_Recv(tempInts, numNodes*3+1, MPI_INT, 0, 81720, mpi_info->comm, &status); |
352 |
if ( mpi_error != MPI_SUCCESS ) { |
if ( mpi_error != MPI_SUCCESS ) { |
353 |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: receive of tempInts failed"); |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: receive of tempInts failed"); |
354 |
return NULL; |
return NULL; |
355 |
} |
} |
356 |
mpi_error = MPI_Recv(tempCoords, numNodes*numDim, MPI_DOUBLE, 0, 8728, mpi_info->comm, &status); |
mpi_error = MPI_Recv(tempCoords, numNodes*numDim, MPI_DOUBLE, 0, 81721, mpi_info->comm, &status); |
357 |
if ( mpi_error != MPI_SUCCESS ) { |
if ( mpi_error != MPI_SUCCESS ) { |
358 |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: receive of tempCoords failed"); |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: receive of tempCoords failed"); |
359 |
return NULL; |
return NULL; |
361 |
chunkNodes = tempInts[numNodes*3]; |
chunkNodes = tempInts[numNodes*3]; |
362 |
#endif |
#endif |
363 |
} /* Worker */ |
} /* Worker */ |
364 |
printf("ksteube CPU=%d/%d name='%s' numDim=%d numNodes=%d chunkNodes=%d\n", mpi_info->rank, mpi_info->size, name, numDim, numNodes, chunkNodes); |
|
365 |
#if 0 |
#if 0 |
366 |
|
/* Display the temp mem for debugging */ |
367 |
printf("ksteube tempInts totalNodes=%d:\n", totalNodes); |
printf("ksteube tempInts totalNodes=%d:\n", totalNodes); |
368 |
for (i0=0; i0<numNodes*3; i0++) { |
for (i0=0; i0<numNodes*3; i0++) { |
369 |
printf(" %2d", tempInts[i0]); |
printf(" %2d", tempInts[i0]); |
376 |
} |
} |
377 |
#endif |
#endif |
378 |
|
|
379 |
/* Copy tempMem into mesh_p */ |
printf("ksteube chunkNodes=%d numNodes=%d\n", chunkNodes, numNodes); |
380 |
Finley_NodeFile_allocTable(mesh_p->Nodes, numNodes); |
/* Copy node data from tempMem to mesh_p */ |
381 |
for (i0=0; i0<numNodes; i0++) { |
Finley_NodeFile_allocTable(mesh_p->Nodes, chunkNodes); |
382 |
mesh_p->Nodes->Id[i0] = tempInts[0+i0]; |
if (Finley_noError()) { |
383 |
mesh_p->Nodes->globalDegreesOfFreedom[i0] = tempInts[numNodes+i0]; |
for (i0=0; i0<chunkNodes; i0++) { |
384 |
mesh_p->Nodes->Tag[i0] = tempInts[numNodes*2+i0]; |
mesh_p->Nodes->Id[i0] = tempInts[0+i0]; |
385 |
for (i1=0; i1<numDim; i1++) { |
mesh_p->Nodes->globalDegreesOfFreedom[i0] = tempInts[numNodes+i0]; |
386 |
mesh_p->Nodes->Coordinates[INDEX2(i1,i0,numDim)] = tempCoords[i0*numDim+i1]; |
mesh_p->Nodes->Tag[i0] = tempInts[numNodes*2+i0]; |
387 |
} |
for (i1=0; i1<numDim; i1++) { |
388 |
|
mesh_p->Nodes->Coordinates[INDEX2(i1,i0,numDim)] = tempCoords[i0*numDim+i1]; |
389 |
|
} |
390 |
|
} |
391 |
} |
} |
392 |
|
|
393 |
TMPMEMFREE(tempInts); |
TMPMEMFREE(tempInts); |
394 |
TMPMEMFREE(tempCoords); |
TMPMEMFREE(tempCoords); |
395 |
|
|
396 |
return mesh_p; /* ksteube temp for debugging */ |
/* read elements */ |
397 |
|
|
398 |
|
/* Read the element typeID */ |
399 |
|
if (Finley_noError()) { |
400 |
|
if (mpi_info->rank == 0) { |
401 |
|
fscanf(fileHandle_p, "%s %d\n", element_type, &numEle); |
402 |
|
typeID=Finley_RefElement_getTypeId(element_type); |
403 |
|
} |
404 |
|
#ifdef PASO_MPI |
405 |
|
if (mpi_info->size > 0) { |
406 |
|
int temp1[3]; |
407 |
|
temp1[0] = typeID; |
408 |
|
temp1[1] = numEle; |
409 |
|
mpi_error = MPI_Bcast (temp1, 2, MPI_INT, 0, mpi_info->comm); |
410 |
|
if (mpi_error != MPI_SUCCESS) { |
411 |
|
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: broadcast of Element typeID failed"); |
412 |
|
return NULL; |
413 |
|
} |
414 |
|
typeID = temp1[0]; |
415 |
|
numEle = temp1[1]; |
416 |
|
} |
417 |
|
#endif |
418 |
|
if (typeID==NoType) { |
419 |
|
sprintf(error_msg, "Finley_Mesh_read: Unidentified element type %s", element_type); |
420 |
|
Finley_setError(VALUE_ERROR, error_msg); |
421 |
|
} |
422 |
|
} |
423 |
|
|
424 |
|
/* Read the element data */ |
425 |
|
mesh_p->Elements=Finley_ElementFile_alloc(typeID,mesh_p->order, mesh_p->reduced_order, mpi_info); |
426 |
|
numNodes = mesh_p->Elements->ReferenceElement->Type->numNodes; /* New meaning for numNodes: num nodes per element */ |
427 |
|
|
428 |
|
if (Finley_noError()) { |
429 |
|
int *tempInts = TMPMEMALLOC(numEle*(2+numNodes)+1, index_t); /* Store Id + Tag + node list (+ one int at end for chunkEle) */ |
430 |
|
int chunkSize = numEle / mpi_info->size, totalEle=0, nextCPU=1, mpi_error; |
431 |
|
if (numEle % mpi_info->size != 0) chunkSize++; /* Remainder from numEle / mpi_info->size will be spread out one-per-CPU */ |
432 |
|
if (mpi_info->rank == 0) { /* Master */ |
433 |
|
for (;;) { /* Infinite loop */ |
434 |
|
chunkEle = 0; |
435 |
|
for (i0=0; i0<numEle*(2+numNodes)+1; i0++) tempInts[i0] = -1; |
436 |
|
for (i0=0; i0<chunkSize; i0++) { |
437 |
|
if (totalEle >= numEle) break; /* End infinite loop */ |
438 |
|
fscanf(fileHandle_p, "%d %d", &tempInts[i0*(2+numNodes)+0], &tempInts[i0*(2+numNodes)+1]); |
439 |
|
for (i1 = 0; i1 < numNodes; i1++) fscanf(fileHandle_p, " %d", &tempInts[i0*(2+numNodes)+2+i1]); |
440 |
|
fscanf(fileHandle_p, "\n"); |
441 |
|
totalEle++; |
442 |
|
chunkEle++; |
443 |
|
} |
444 |
|
/* Eventually we'll send chunk of nodes to each CPU except 0 itself, here goes one of them */ |
445 |
|
if (nextCPU < mpi_info->size) { |
446 |
|
#ifdef PASO_MPI |
447 |
|
tempInts[numEle*(2+numNodes)] = chunkEle; |
448 |
|
printf("ksteube CPU=%d/%d send to %d\n", mpi_info->rank, mpi_info->size, nextCPU); |
449 |
|
mpi_error = MPI_Send(tempInts, numEle*(2+numNodes)+1, MPI_INT, nextCPU, 81722, mpi_info->comm); |
450 |
|
if ( mpi_error != MPI_SUCCESS ) { |
451 |
|
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: send of tempInts for Elements failed"); |
452 |
|
return NULL; |
453 |
|
} |
454 |
|
#endif |
455 |
|
nextCPU++; |
456 |
|
} |
457 |
|
if (totalEle >= numEle) break; /* End infinite loop */ |
458 |
|
} /* Infinite loop */ |
459 |
|
} /* End master */ |
460 |
|
else { /* Worker */ |
461 |
|
#ifdef PASO_MPI |
462 |
|
/* Each worker receives two messages */ |
463 |
|
MPI_Status status; |
464 |
|
printf("ksteube CPU=%d/%d recv on %d\n", mpi_info->rank, mpi_info->size, mpi_info->rank); |
465 |
|
mpi_error = MPI_Recv(tempInts, numEle*(2+numNodes)+1, MPI_INT, 0, 81722, mpi_info->comm, &status); |
466 |
|
if ( mpi_error != MPI_SUCCESS ) { |
467 |
|
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: receive of tempInts for Elements failed"); |
468 |
|
return NULL; |
469 |
|
} |
470 |
|
chunkEle = tempInts[numEle*(2+numNodes)]; |
471 |
|
#endif |
472 |
|
} /* Worker */ |
473 |
|
#if 1 |
474 |
|
/* Display the temp mem for debugging */ |
475 |
|
printf("ksteube tempInts numEle=%d chunkEle=%d AAA:\n", numEle, chunkEle); |
476 |
|
for (i0=0; i0<numEle*(numNodes+2); i0++) { |
477 |
|
printf(" %2d", tempInts[i0]); |
478 |
|
if (i0%(numNodes+2)==numNodes+2-1) printf("\n"); |
479 |
|
} |
480 |
|
#endif |
481 |
|
|
482 |
|
/* Copy Element data from tempInts to mesh_p */ |
483 |
|
Finley_ElementFile_allocTable(mesh_p->Elements, chunkEle); |
484 |
|
mesh_p->Elements->minColor=0; |
485 |
|
mesh_p->Elements->maxColor=chunkEle-1; |
486 |
|
if (Finley_noError()) { |
487 |
|
#pragma omp parallel for private (i0, i1) |
488 |
|
for (i0=0; i0<chunkEle; i0++) { |
489 |
|
mesh_p->Elements->Id[i0] = tempInts[i0*(2+numNodes)+0]; |
490 |
|
mesh_p->Elements->Tag[i0] = tempInts[i0*(2+numNodes)+1]; |
491 |
|
for (i1 = 0; i1 < numNodes; i1++) { |
492 |
|
mesh_p->Elements->Nodes[INDEX2(i1, i0, numNodes)] = tempInts[i0*(2+numNodes)+2+i1]; |
493 |
|
} |
494 |
|
} |
495 |
|
} |
496 |
|
|
497 |
|
TMPMEMFREE(tempInts); |
498 |
|
} |
499 |
|
|
500 |
|
|
501 |
|
#if 0 /* this is the original code for reading elements */ |
502 |
/* read elements */ |
/* read elements */ |
503 |
if (Finley_noError()) { |
if (Finley_noError()) { |
504 |
|
|
505 |
fscanf(fileHandle_p, "%s %d\n", element_type, &numEle); |
fscanf(fileHandle_p, "%s %d\n", element_type, &numEle); |
506 |
typeID=Finley_RefElement_getTypeId(element_type); |
typeID=Finley_RefElement_getTypeId(element_type); |
507 |
if (typeID==NoType) { |
if (typeID==NoType) { |
528 |
} |
} |
529 |
} |
} |
530 |
} |
} |
531 |
|
#endif |
532 |
|
|
533 |
|
printf("ksteube CPU=%d/%d Element typeID=%d\n", mpi_info->rank, mpi_info->size, typeID); |
534 |
|
|
535 |
|
#if 1 |
536 |
|
|
537 |
|
/* Define other structures to keep mesh_write from crashing */ |
538 |
|
|
539 |
|
mesh_p->FaceElements=Finley_ElementFile_alloc(0, mesh_p->order, mesh_p->reduced_order, mpi_info); |
540 |
|
Finley_ElementFile_allocTable(mesh_p->FaceElements, 0); |
541 |
|
|
542 |
|
mesh_p->ContactElements=Finley_ElementFile_alloc(0, mesh_p->order, mesh_p->reduced_order, mpi_info); |
543 |
|
Finley_ElementFile_allocTable(mesh_p->ContactElements, 0); |
544 |
|
|
545 |
|
mesh_p->Points=Finley_ElementFile_alloc(0, mesh_p->order, mesh_p->reduced_order, mpi_info); |
546 |
|
Finley_ElementFile_allocTable(mesh_p->Points, 0); |
547 |
|
|
548 |
|
#endif |
549 |
|
|
550 |
|
#if 0 /* comment out the rest of the un-implemented crap for now */ |
551 |
/* get the face elements */ |
/* get the face elements */ |
552 |
if (Finley_noError()) { |
if (Finley_noError()) { |
553 |
fscanf(fileHandle_p, "%s %d\n", element_type, &numEle); |
fscanf(fileHandle_p, "%s %d\n", element_type, &numEle); |
601 |
} |
} |
602 |
} |
} |
603 |
} |
} |
604 |
} |
} |
605 |
/* get the nodal element */ |
/* get the nodal element */ |
606 |
if (Finley_noError()) { |
if (Finley_noError()) { |
607 |
fscanf(fileHandle_p, "%s %d\n", element_type, &numEle); |
fscanf(fileHandle_p, "%s %d\n", element_type, &numEle); |
638 |
} |
} |
639 |
} |
} |
640 |
} |
} |
641 |
|
#endif /* comment out the rest of the un-implemented crap for now */ |
642 |
} |
} |
643 |
/* close file */ |
/* close file */ |
644 |
fclose(fileHandle_p); |
if (mpi_info->rank == 0) fclose(fileHandle_p); |
645 |
|
|
646 |
/* resolve id's : */ |
/* resolve id's : */ |
647 |
/* rearrange elements: */ |
/* rearrange elements: */ |
648 |
|
|
649 |
if (Finley_noError()) Finley_Mesh_resolveNodeIds(mesh_p); |
if (Finley_noError()) Finley_Mesh_resolveNodeIds(mesh_p); |
650 |
if (Finley_noError()) Finley_Mesh_prepare(mesh_p, optimize); |
if (Finley_noError()) Finley_Mesh_prepare(mesh_p, optimize); |
651 |
|
return mesh_p; /* ksteube temp return for debugging */ |
652 |
|
|
653 |
/* that's it */ |
/* that's it */ |
654 |
#ifdef Finley_TRACE |
#ifdef Finley_TRACE |
655 |
printf("timing: reading mesh: %.4e sec\n",Finley_timer()-time0); |
printf("timing: reading mesh: %.4e sec\n",Finley_timer()-time0); |