242 |
double time0=Finley_timer(); |
double time0=Finley_timer(); |
243 |
FILE *fileHandle_p = NULL; |
FILE *fileHandle_p = NULL; |
244 |
ElementTypeId typeID; |
ElementTypeId typeID; |
|
|
|
|
#if 0 /* comment out the rest of the un-implemented crap for now */ |
|
|
/* See below */ |
|
245 |
ElementTypeId faceTypeID, contactTypeID, pointTypeID; |
ElementTypeId faceTypeID, contactTypeID, pointTypeID; |
246 |
index_t tag_key; |
index_t tag_key; |
|
#endif |
|
247 |
|
|
248 |
Finley_resetError(); |
Finley_resetError(); |
249 |
|
|
266 |
} |
} |
267 |
|
|
268 |
#ifdef PASO_MPI |
#ifdef PASO_MPI |
269 |
/* MPI Broadcast numDim, numNodes, name */ |
/* MPI Broadcast numDim, numNodes, name if there are multiple MPI procs*/ |
270 |
if (mpi_info->size > 0) { |
if (mpi_info->size > 1) { |
271 |
int temp1[3], error_code; |
int temp1[3], error_code; |
272 |
temp1[0] = numDim; |
temp1[0] = numDim; |
273 |
temp1[1] = numNodes; |
temp1[1] = numNodes; |
290 |
/* allocate mesh */ |
/* allocate mesh */ |
291 |
mesh_p = Finley_Mesh_alloc(name,numDim,order,reduced_order,mpi_info); |
mesh_p = Finley_Mesh_alloc(name,numDim,order,reduced_order,mpi_info); |
292 |
if (Finley_noError()) { |
if (Finley_noError()) { |
293 |
|
/* Each CPU will get at most chunkSize nodes so the message has to be sufficiently large for that */ |
294 |
int chunkSize = numNodes / mpi_info->size + 1, totalNodes=0, chunkNodes=0, chunkEle=0, nextCPU=1; |
int chunkSize = numNodes / mpi_info->size + 1, totalNodes=0, chunkNodes=0, chunkEle=0, nextCPU=1; |
295 |
int *tempInts = TMPMEMALLOC(numNodes*3+1, index_t); |
int *tempInts = TMPMEMALLOC(chunkSize*3+1, index_t); /* Stores the integer message data */ |
296 |
double *tempCoords = TMPMEMALLOC(numNodes*numDim, double); |
double *tempCoords = TMPMEMALLOC(chunkSize*numDim, double); /* Stores the double message data */ |
297 |
|
|
298 |
/* |
/* |
299 |
Read a chunk of nodes, send to worker CPU if available, copy chunk into local mesh_p |
Read chunkSize nodes, send it in a chunk to worker CPU which copies chunk into its local mesh_p |
300 |
It doesn't matter that a CPU has the wrong nodes for its elements, this is sorted out later |
It doesn't matter that a CPU has the wrong nodes for its elements, this is sorted out later |
301 |
First chunk sent to CPU 1, second to CPU 2, ... |
First chunk sent to CPU 1, second to CPU 2, ... |
302 |
Last chunk stays on CPU 0 (the master) |
Last chunk stays on CPU 0 (the master) |
305 |
|
|
306 |
if (mpi_info->rank == 0) { /* Master */ |
if (mpi_info->rank == 0) { /* Master */ |
307 |
for (;;) { /* Infinite loop */ |
for (;;) { /* Infinite loop */ |
308 |
|
for (i0=0; i0<chunkSize*3+1; i0++) tempInts[i0] = -1; |
309 |
|
for (i0=0; i0<chunkSize*numDim; i0++) tempCoords[i0] = -1.0; |
310 |
chunkNodes = 0; |
chunkNodes = 0; |
|
for (i0=0; i0<numNodes*3+1; i0++) tempInts[i0] = -1; |
|
|
for (i0=0; i0<numNodes*numDim; i0++) tempCoords[i0] = -1.0; |
|
311 |
for (i1=0; i1<chunkSize; i1++) { |
for (i1=0; i1<chunkSize; i1++) { |
312 |
if (totalNodes >= numNodes) break; |
if (totalNodes >= numNodes) break; /* Maybe end the infinite loop */ |
313 |
if (1 == numDim) |
if (1 == numDim) |
314 |
fscanf(fileHandle_p, "%d %d %d %le\n", |
fscanf(fileHandle_p, "%d %d %d %le\n", |
315 |
&tempInts[0+i1], &tempInts[numNodes+i1], &tempInts[numNodes*2+i1], |
&tempInts[0+i1], &tempInts[chunkSize+i1], &tempInts[chunkSize*2+i1], |
316 |
&tempCoords[i1*numDim+0]); |
&tempCoords[i1*numDim+0]); |
317 |
if (2 == numDim) |
if (2 == numDim) |
318 |
fscanf(fileHandle_p, "%d %d %d %le %le\n", |
fscanf(fileHandle_p, "%d %d %d %le %le\n", |
319 |
&tempInts[0+i1], &tempInts[numNodes+i1], &tempInts[numNodes*2+i1], |
&tempInts[0+i1], &tempInts[chunkSize+i1], &tempInts[chunkSize*2+i1], |
320 |
&tempCoords[i1*numDim+0], &tempCoords[i1*numDim+1]); |
&tempCoords[i1*numDim+0], &tempCoords[i1*numDim+1]); |
321 |
if (3 == numDim) |
if (3 == numDim) |
322 |
fscanf(fileHandle_p, "%d %d %d %le %le %le\n", |
fscanf(fileHandle_p, "%d %d %d %le %le %le\n", |
323 |
&tempInts[0+i1], &tempInts[numNodes+i1], &tempInts[numNodes*2+i1], |
&tempInts[0+i1], &tempInts[chunkSize+i1], &tempInts[chunkSize*2+i1], |
324 |
&tempCoords[i1*numDim+0], &tempCoords[i1*numDim+1], &tempCoords[i1*numDim+2]); |
&tempCoords[i1*numDim+0], &tempCoords[i1*numDim+1], &tempCoords[i1*numDim+2]); |
325 |
totalNodes++; |
totalNodes++; /* When do we quit the infinite loop? */ |
326 |
chunkNodes++; |
chunkNodes++; /* How many nodes do we actually have in this chunk? It may be smaller than chunkSize. */ |
327 |
|
} |
328 |
|
if (chunkNodes > chunkSize) { |
329 |
|
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: error reading chunks of mesh, data too large for message size"); |
330 |
|
return NULL; |
331 |
} |
} |
|
/* Eventually we'll send chunk of nodes to each CPU numbered 1 ... mpi_info->size-1, here goes one of them */ |
|
|
if (nextCPU < mpi_info->size) { |
|
332 |
#ifdef PASO_MPI |
#ifdef PASO_MPI |
333 |
|
/* Eventually we'll send chunkSize nodes to each CPU numbered 1 ... mpi_info->size-1, here goes one of them */ |
334 |
|
if (nextCPU < mpi_info->size) { |
335 |
int mpi_error; |
int mpi_error; |
336 |
|
tempInts[chunkSize*3] = chunkNodes; /* The message has one more int to send chunkNodes */ |
337 |
tempInts[numNodes*3] = chunkNodes; |
mpi_error = MPI_Send(tempInts, chunkSize*3+1, MPI_INT, nextCPU, 81720, mpi_info->comm); |
|
/* ksteube The size of this message can and should be brought down to chunkNodes*3+1, must re-org tempInts */ |
|
|
mpi_error = MPI_Send(tempInts, numNodes*3+1, MPI_INT, nextCPU, 81720, mpi_info->comm); |
|
338 |
if ( mpi_error != MPI_SUCCESS ) { |
if ( mpi_error != MPI_SUCCESS ) { |
339 |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: send of tempInts failed"); |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: send of tempInts failed"); |
340 |
return NULL; |
return NULL; |
341 |
} |
} |
342 |
mpi_error = MPI_Send(tempCoords, numNodes*numDim, MPI_DOUBLE, nextCPU, 81721, mpi_info->comm); |
mpi_error = MPI_Send(tempCoords, chunkSize*numDim, MPI_DOUBLE, nextCPU, 81721, mpi_info->comm); |
343 |
if ( mpi_error != MPI_SUCCESS ) { |
if ( mpi_error != MPI_SUCCESS ) { |
344 |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: send of tempCoords failed"); |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: send of tempCoords failed"); |
345 |
return NULL; |
return NULL; |
346 |
} |
} |
|
#endif |
|
347 |
nextCPU++; |
nextCPU++; |
348 |
} |
} |
349 |
if (totalNodes >= numNodes) break; |
#endif |
350 |
|
if (totalNodes >= numNodes) break; /* Maybe end the infinite loop */ |
351 |
} /* Infinite loop */ |
} /* Infinite loop */ |
352 |
} /* End master */ |
} /* End master */ |
353 |
else { /* Worker */ |
else { /* Worker */ |
355 |
/* Each worker receives two messages */ |
/* Each worker receives two messages */ |
356 |
MPI_Status status; |
MPI_Status status; |
357 |
int mpi_error; |
int mpi_error; |
358 |
mpi_error = MPI_Recv(tempInts, numNodes*3+1, MPI_INT, 0, 81720, mpi_info->comm, &status); |
mpi_error = MPI_Recv(tempInts, chunkSize*3+1, MPI_INT, 0, 81720, mpi_info->comm, &status); |
359 |
if ( mpi_error != MPI_SUCCESS ) { |
if ( mpi_error != MPI_SUCCESS ) { |
360 |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: receive of tempInts failed"); |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: receive of tempInts failed"); |
361 |
return NULL; |
return NULL; |
362 |
} |
} |
363 |
mpi_error = MPI_Recv(tempCoords, numNodes*numDim, MPI_DOUBLE, 0, 81721, mpi_info->comm, &status); |
mpi_error = MPI_Recv(tempCoords, chunkSize*numDim, MPI_DOUBLE, 0, 81721, mpi_info->comm, &status); |
364 |
if ( mpi_error != MPI_SUCCESS ) { |
if ( mpi_error != MPI_SUCCESS ) { |
365 |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: receive of tempCoords failed"); |
Finley_setError(PASO_MPI_ERROR, "Finley_Mesh_read: receive of tempCoords failed"); |
366 |
return NULL; |
return NULL; |
367 |
} |
} |
368 |
chunkNodes = tempInts[numNodes*3]; |
chunkNodes = tempInts[chunkSize*3]; /* How many nodes are in this workers chunk? */ |
369 |
#endif |
#endif |
370 |
} /* Worker */ |
} /* Worker */ |
371 |
|
|
372 |
#if 0 |
#if 1 |
373 |
/* Display the temp mem for debugging */ |
/* Display the temp mem for debugging */ |
374 |
printf("ksteube tempInts totalNodes=%d:\n", totalNodes); |
printf("ksteube Nodes tempInts\n"); |
375 |
for (i0=0; i0<numNodes*3; i0++) { |
for (i0=0; i0<chunkSize*3; i0++) { |
376 |
printf(" %2d", tempInts[i0]); |
printf(" %2d", tempInts[i0]); |
377 |
if (i0%numNodes==numNodes-1) printf("\n"); |
if (i0%chunkSize==chunkSize-1) printf("\n"); |
378 |
} |
} |
379 |
printf("ksteube tempCoords:\n"); |
printf("ksteube tempCoords:\n"); |
380 |
for (i0=0; i0<chunkNodes*numDim; i0++) { |
for (i0=0; i0<chunkSize*numDim; i0++) { |
381 |
printf(" %20.15e", tempCoords[i0]); |
printf(" %20.15e", tempCoords[i0]); |
382 |
if (i0%numDim==numDim-1) printf("\n"); |
if (i0%numDim==numDim-1) printf("\n"); |
383 |
} |
} |
384 |
#endif |
#endif |
385 |
|
|
386 |
printf("ksteube chunkNodes=%d numNodes=%d\n", chunkNodes, numNodes); |
printf("ksteube numDim=%d numNodes=%d mesh name='%s' chunkNodes=%d numNodes=%d\n", numDim, numNodes, name, chunkNodes, numNodes); |
387 |
|
|
388 |
/* Copy node data from tempMem to mesh_p */ |
/* Copy node data from tempMem to mesh_p */ |
389 |
Finley_NodeFile_allocTable(mesh_p->Nodes, chunkNodes); |
Finley_NodeFile_allocTable(mesh_p->Nodes, chunkNodes); |
390 |
if (Finley_noError()) { |
if (Finley_noError()) { |
391 |
for (i0=0; i0<chunkNodes; i0++) { |
for (i0=0; i0<chunkNodes; i0++) { |
392 |
mesh_p->Nodes->Id[i0] = tempInts[0+i0]; |
mesh_p->Nodes->Id[i0] = tempInts[0+i0]; |
393 |
mesh_p->Nodes->globalDegreesOfFreedom[i0] = tempInts[numNodes+i0]; |
mesh_p->Nodes->globalDegreesOfFreedom[i0] = tempInts[chunkSize+i0]; |
394 |
mesh_p->Nodes->Tag[i0] = tempInts[numNodes*2+i0]; |
mesh_p->Nodes->Tag[i0] = tempInts[chunkSize*2+i0]; |
395 |
for (i1=0; i1<numDim; i1++) { |
for (i1=0; i1<numDim; i1++) { |
396 |
mesh_p->Nodes->Coordinates[INDEX2(i1,i0,numDim)] = tempCoords[i0*numDim+i1]; |
mesh_p->Nodes->Coordinates[INDEX2(i1,i0,numDim)] = tempCoords[i0*numDim+i1]; |
397 |
} |
} |
410 |
typeID=Finley_RefElement_getTypeId(element_type); |
typeID=Finley_RefElement_getTypeId(element_type); |
411 |
} |
} |
412 |
#ifdef PASO_MPI |
#ifdef PASO_MPI |
413 |
if (mpi_info->size > 0) { |
if (mpi_info->size > 1) { |
414 |
int temp1[3], mpi_error; |
int temp1[3], mpi_error; |
415 |
temp1[0] = (int) typeID; |
temp1[0] = (int) typeID; |
416 |
temp1[1] = numEle; |
temp1[1] = numEle; |
481 |
chunkEle = tempInts[numEle*(2+numNodes)]; |
chunkEle = tempInts[numEle*(2+numNodes)]; |
482 |
#endif |
#endif |
483 |
} /* Worker */ |
} /* Worker */ |
484 |
#if 1 |
#if 0 |
485 |
/* Display the temp mem for debugging */ |
/* Display the temp mem for debugging */ |
486 |
printf("ksteube tempInts numEle=%d chunkEle=%d AAA:\n", numEle, chunkEle); |
printf("ksteube Elements tempInts numEle=%d chunkEle=%d AAA:\n", numEle, chunkEle); |
487 |
for (i0=0; i0<numEle*(numNodes+2); i0++) { |
for (i0=0; i0<numEle*(numNodes+2); i0++) { |
488 |
printf(" %2d", tempInts[i0]); |
printf(" %2d", tempInts[i0]); |
489 |
if (i0%(numNodes+2)==numNodes+2-1) printf("\n"); |
if (i0%(numNodes+2)==numNodes+2-1) printf("\n"); |