/[escript]/trunk/finley/src/NodeFile.cpp
ViewVC logotype

Contents of /trunk/finley/src/NodeFile.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4428 - (show annotations)
Thu May 30 06:39:10 2013 UTC (6 years, 3 months ago) by caltinay
File size: 28296 byte(s)
finley's NodeFile is now a class.
Associated changes:
- use of some C++ constructs/functions/types (1st pass)
- removal of obsolete pointer check
- merging of some duplicated code
- ...

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /****************************************************************************
18
19 Finley: NodeFile
20
21 *****************************************************************************/
22
23 #include "NodeFile.h"
24 #include "Util.h"
25 #include <escript/Data.h>
26
27 #include <limits>
28 #include <sstream>
29
30 namespace finley {
31
32 // helper function
33 static void scatterEntries(int n, int* index, int min_index, int max_index,
34 int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
35 int* globalDegreesOfFreedom_out,
36 int* globalDegreesOfFreedom_in,
37 int numDim, double* Coordinates_out,
38 double* Coordinates_in)
39 {
40 const int range = max_index-min_index;
41 const size_t numDim_size = numDim*sizeof(double);
42
43 #pragma omp parallel for
44 for (int i=0; i<n; i++) {
45 const int k=index[i]-min_index;
46 if ((k>=0) && (k<range)) {
47 Id_out[k]=Id_in[i];
48 Tag_out[k]=Tag_in[i];
49 globalDegreesOfFreedom_out[k]=globalDegreesOfFreedom_in[i];
50 memcpy(&(Coordinates_out[INDEX2(0,k,numDim)]),
51 &(Coordinates_in[INDEX2(0,i,numDim)]), numDim_size);
52 }
53 }
54 }
55
56 // helper function
57 static void gatherEntries(int n, int* index, int min_index, int max_index,
58 int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
59 int* globalDegreesOfFreedom_out,
60 int* globalDegreesOfFreedom_in,
61 int numDim, double* Coordinates_out,
62 double* Coordinates_in)
63 {
64 const int range = max_index-min_index;
65 const size_t numDim_size = numDim*sizeof(double);
66
67 #pragma omp parallel for
68 for (int i=0; i<n; i++) {
69 const int k=index[i]-min_index;
70 if ((k>=0) && (k<range)) {
71 Id_out[i]=Id_in[k];
72 Tag_out[i]=Tag_in[k];
73 globalDegreesOfFreedom_out[i]=globalDegreesOfFreedom_in[k];
74 memcpy(&(Coordinates_out[INDEX2(0,i,numDim)]),
75 &(Coordinates_in[INDEX2(0,k,numDim)]), numDim_size);
76 }
77 }
78 }
79
80 /// constructor
81 /// use NodeFile::allocTable to allocate the node table (Id,Coordinates)
82 NodeFile::NodeFile(int nDim, Esys_MPIInfo *mpiInfo) :
83 numNodes(0),
84 numDim(nDim),
85 Id(NULL),
86 Tag(NULL),
87 globalDegreesOfFreedom(NULL),
88 Coordinates(NULL),
89 globalReducedDOFIndex(NULL),
90 globalReducedNodesIndex(NULL),
91 globalNodesIndex(NULL),
92 nodesMapping(NULL),
93 reducedNodesMapping(NULL),
94 degreesOfFreedomMapping(NULL),
95 reducedDegreesOfFreedomMapping(NULL),
96 nodesDistribution(NULL),
97 reducedNodesDistribution(NULL),
98 degreesOfFreedomDistribution(NULL),
99 reducedDegreesOfFreedomDistribution(NULL),
100 degreesOfFreedomConnector(NULL),
101 reducedDegreesOfFreedomConnector(NULL),
102 reducedNodesId(NULL),
103 degreesOfFreedomId(NULL),
104 reducedDegreesOfFreedomId(NULL),
105 status(FINLEY_INITIAL_STATUS)
106 {
107 MPIInfo = Esys_MPIInfo_getReference(mpiInfo);
108 }
109
110 /// destructor
111 NodeFile::~NodeFile()
112 {
113 freeTable();
114 Esys_MPIInfo_free(MPIInfo);
115 }
116
117 /// allocates the node table within a node file to hold NN nodes.
118 void NodeFile::allocTable(int NN)
119 {
120 if (numNodes>0)
121 freeTable();
122
123 Id=new int[NN];
124 Coordinates=new double[NN*numDim];
125 Tag=new int[NN];
126 globalDegreesOfFreedom=new int[NN];
127 globalReducedDOFIndex=new int[NN];
128 globalReducedNodesIndex=new int[NN];
129 globalNodesIndex=new int[NN];
130 reducedNodesId=new int[NN];
131 degreesOfFreedomId=new int[NN];
132 reducedDegreesOfFreedomId=new int[NN];
133 numNodes=NN;
134
135 // this initialization makes sure that data are located on the right
136 // processor
137 #pragma omp parallel for
138 for (int n=0; n<numNodes; n++) {
139 Id[n]=-1;
140 for (int i=0; i<numDim; i++)
141 Coordinates[INDEX2(i,n,numDim)]=0.;
142 Tag[n]=-1;
143 globalDegreesOfFreedom[n]=-1;
144 globalReducedDOFIndex[n]=-1;
145 globalReducedNodesIndex[n]=-1;
146 globalNodesIndex[n]=-1;
147 reducedNodesId[n]=-1;
148 degreesOfFreedomId[n]=-1;
149 reducedDegreesOfFreedomId[n]=-1;
150 }
151 }
152
153 /* frees the node table within a node file: */
154
155 void NodeFile::freeTable()
156 {
157 delete[] Id;
158 delete[] Coordinates;
159 delete[] globalDegreesOfFreedom;
160 delete[] globalReducedDOFIndex;
161 delete[] globalReducedNodesIndex;
162 delete[] globalNodesIndex;
163 delete[] Tag;
164 delete[] reducedNodesId;
165 delete[] degreesOfFreedomId;
166 delete[] reducedDegreesOfFreedomId;
167 tagsInUse.clear();
168 Finley_NodeMapping_free(nodesMapping);
169 nodesMapping=NULL;
170 Finley_NodeMapping_free(reducedNodesMapping);
171 reducedNodesMapping=NULL;
172 Finley_NodeMapping_free(degreesOfFreedomMapping);
173 degreesOfFreedomMapping=NULL;
174 Finley_NodeMapping_free(reducedDegreesOfFreedomMapping);
175 reducedDegreesOfFreedomMapping=NULL;
176 Paso_Distribution_free(nodesDistribution);
177 nodesDistribution=NULL;
178 Paso_Distribution_free(reducedNodesDistribution);
179 nodesDistribution=NULL;
180 Paso_Distribution_free(degreesOfFreedomDistribution);
181 degreesOfFreedomDistribution=NULL;
182 Paso_Distribution_free(reducedDegreesOfFreedomDistribution);
183 reducedDegreesOfFreedomDistribution=NULL;
184 Paso_Connector_free(degreesOfFreedomConnector);
185 degreesOfFreedomConnector=NULL;
186 Paso_Connector_free(reducedDegreesOfFreedomConnector);
187 reducedDegreesOfFreedomConnector=NULL;
188
189 numNodes=0;
190 }
191
192 void NodeFile::updateTagList()
193 {
194 Finley_Util_setValuesInUse(Tag, numNodes, tagsInUse, MPIInfo);
195 }
196
197 /// copies the array newX into this->coordinates
198 void NodeFile::setCoordinates(const escript::Data& cNewX)
199 {
200 if (cNewX.getDataPointSize() != numDim) {
201 std::stringstream ss;
202 ss << "NodeFile::setCoordinates: number of dimensions of new "
203 "coordinates has to be " << numDim;
204 const std::string errorMsg(ss.str());
205 Finley_setError(VALUE_ERROR, errorMsg.c_str());
206 } else if (cNewX.getNumDataPointsPerSample() != 1 ||
207 cNewX.getNumSamples() != numNodes) {
208 std::stringstream ss;
209 ss << "NodeFile::setCoordinates: number of given nodes must be "
210 << numNodes;
211 const std::string errorMsg(ss.str());
212 Finley_setError(VALUE_ERROR, errorMsg.c_str());
213 } else {
214 const size_t numDim_size=numDim*sizeof(double);
215 Finley_increaseStatus(this);
216 escript::Data& newX = *const_cast<escript::Data*>(&cNewX);
217 #pragma omp parallel for
218 for (int n=0; n<numNodes; n++) {
219 memcpy(&(Coordinates[INDEX2(0,n,numDim)]), newX.getSampleDataRO(n), numDim_size);
220 }
221 }
222 }
223
224 /// sets tags to newTag where mask>0
225 void NodeFile::setTags(const int newTag, const escript::Data& cMask)
226 {
227 Finley_resetError();
228
229 if (1 != cMask.getDataPointSize()) {
230 Finley_setError(TYPE_ERROR, "NodeFile::setTags: number of components of mask must be 1.");
231 return;
232 } else if (cMask.getNumDataPointsPerSample() != 1 ||
233 cMask.getNumSamples() != numNodes) {
234 Finley_setError(TYPE_ERROR, "NodeFile::setTags: illegal number of samples of mask Data object");
235 return;
236 }
237
238 escript::Data& mask = *const_cast<escript::Data*>(&cMask);
239 #pragma omp parallel for
240 for (int n=0; n<numNodes; n++) {
241 if (mask.getSampleDataRO(n)[0] > 0)
242 Tag[n]=newTag;
243 }
244 updateTagList();
245 }
246
247 std::pair<int,int> NodeFile::getDOFRange() const
248 {
249 std::pair<int,int> result(
250 Finley_Util_getMinMaxInt(1, numNodes, globalDegreesOfFreedom));
251 if (result.second < result.first) {
252 result.first = -1;
253 result.second = 0;
254 }
255 return result;
256 }
257
258 std::pair<int,int> NodeFile::getGlobalIdRange() const
259 {
260 std::pair<int,int> result(Finley_Util_getMinMaxInt(1, numNodes, Id));
261
262 #ifdef ESYS_MPI
263 int global_id_range[2];
264 int id_range[2] = { -result.first, result.second };
265 MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
266 result.first = -global_id_range[0];
267 result.second = global_id_range[1];
268 #endif
269 if (result.second < result.first) {
270 result.first = -1;
271 result.second = 0;
272 }
273 return result;
274 }
275
276 std::pair<int,int> NodeFile::getGlobalDOFRange() const
277 {
278 std::pair<int,int> result(
279 Finley_Util_getMinMaxInt(1, numNodes, globalDegreesOfFreedom));
280
281 #ifdef ESYS_MPI
282 int global_id_range[2];
283 int id_range[2] = { -result.first, result.second };
284 MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
285 result.first = -global_id_range[0];
286 result.second = global_id_range[1];
287 #endif
288 if (result.second < result.first) {
289 result.first = -1;
290 result.second = 0;
291 }
292 return result;
293 }
294
295 std::pair<int,int> NodeFile::getGlobalNodeIDIndexRange() const
296 {
297 std::pair<int,int> result(
298 Finley_Util_getMinMaxInt(1, numNodes, globalNodesIndex));
299
300 #ifdef ESYS_MPI
301 int global_id_range[2];
302 int id_range[2] = { -result.first, result.second };
303 MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
304 result.first = -global_id_range[0];
305 result.second = global_id_range[1];
306 #endif
307 if (result.second < result.first) {
308 result.first = -1;
309 result.second = 0;
310 }
311 return result;
312 }
313
314 void NodeFile::copyTable(int offset, int idOffset, int dofOffset,
315 const NodeFile* in)
316 {
317 // check number of dimensions and table size
318 if (numDim != in->numDim) {
319 Finley_setError(TYPE_ERROR, "NodeFile::copyTable: dimensions of node files don't match");
320 return;
321 }
322 if (numNodes < in->numNodes+offset) {
323 Finley_setError(MEMORY_ERROR, "NodeFile::copyTable: node table is too small.");
324 return;
325 }
326
327 #pragma omp parallel for
328 for (int n=0; n<in->numNodes; n++) {
329 Id[offset+n]=in->Id[n]+idOffset;
330 Tag[offset+n]=in->Tag[n];
331 globalDegreesOfFreedom[offset+n]=in->globalDegreesOfFreedom[n]+dofOffset;
332 for(int i=0; i<numDim; i++)
333 Coordinates[INDEX2(i, offset+n, numDim)] =
334 in->Coordinates[INDEX2(i, n, in->numDim)];
335 }
336 }
337
338 /// scatters the NodeFile in into this NodeFile using index[0:in->numNodes-1].
339 /// index has to be between 0 and numNodes-1.
340 /// colouring is chosen for the worst case
341 void NodeFile::scatter(int* index, const NodeFile* in)
342 {
343 scatterEntries(numNodes, index, 0, in->numNodes, Id, in->Id, Tag, in->Tag,
344 globalDegreesOfFreedom, in->globalDegreesOfFreedom,
345 numDim, Coordinates, in->Coordinates);
346 }
347
348 /// gathers this NodeFile from the NodeFile 'in' using the entries in
349 /// index[0:out->numNodes-1] which are between min_index and max_index
350 /// (exclusive)
351 void NodeFile::gather(int* index, const NodeFile* in)
352 {
353 const std::pair<int,int> id_range(in->getGlobalIdRange());
354 gatherEntries(numNodes, index, id_range.first, id_range.second, Id, in->Id,
355 Tag, in->Tag, globalDegreesOfFreedom, in->globalDegreesOfFreedom,
356 numDim, Coordinates, in->Coordinates);
357 }
358
359 void NodeFile::gather_global(int* index, const NodeFile* in)
360 {
361 // get the global range of node ids
362 const std::pair<int,int> id_range(in->getGlobalIdRange());
363 const int undefined_node=id_range.first-1;
364 std::vector<int> distribution(in->MPIInfo->size+1);
365
366 // distribute the range of node ids
367 int buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,
368 id_range.first, id_range.second, &distribution[0]);
369
370 // allocate buffers
371 int *Id_buffer=new int[buffer_len];
372 int *Tag_buffer=new int[buffer_len];
373 int *globalDegreesOfFreedom_buffer=new int[buffer_len];
374 double *Coordinates_buffer=new double[buffer_len*numDim];
375
376 // fill Id_buffer by the undefined_node marker to check if nodes
377 // are defined
378 #pragma omp parallel for
379 for (int n=0; n<buffer_len; n++)
380 Id_buffer[n]=undefined_node;
381
382 // fill the buffer by sending portions around in a circle
383 #ifdef ESYS_MPI
384 MPI_Status status;
385 int dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
386 int source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
387 #endif
388 int buffer_rank=in->MPIInfo->rank;
389 for (int p=0; p<in->MPIInfo->size; ++p) {
390 if (p>0) { // the initial send can be skipped
391 #ifdef ESYS_MPI
392 MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
393 in->MPIInfo->msg_tag_counter, source,
394 in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
395 MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
396 in->MPIInfo->msg_tag_counter+1, source,
397 in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
398 MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
399 MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
400 in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
401 MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
402 MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
403 in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
404 #endif
405 in->MPIInfo->msg_tag_counter+=4;
406 }
407 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
408 scatterEntries(in->numNodes, in->Id, distribution[buffer_rank],
409 distribution[buffer_rank+1], Id_buffer, in->Id,
410 Tag_buffer, in->Tag, globalDegreesOfFreedom_buffer,
411 in->globalDegreesOfFreedom, numDim, Coordinates_buffer,
412 in->Coordinates);
413 }
414 // now entries are collected from the buffer again by sending the
415 // entries around in a circle
416 #ifdef ESYS_MPI
417 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
418 source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
419 #endif
420 buffer_rank=in->MPIInfo->rank;
421 for (int p=0; p<in->MPIInfo->size; ++p) {
422 gatherEntries(numNodes, index, distribution[buffer_rank],
423 distribution[buffer_rank+1], Id, Id_buffer, Tag, Tag_buffer,
424 globalDegreesOfFreedom, globalDegreesOfFreedom_buffer, numDim,
425 Coordinates, Coordinates_buffer);
426 if (p < in->MPIInfo->size-1) { // the last send can be skipped
427 #ifdef ESYS_MPI
428 MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
429 in->MPIInfo->msg_tag_counter, source,
430 in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
431 MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
432 in->MPIInfo->msg_tag_counter+1, source,
433 in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
434 MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
435 MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
436 in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
437 MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
438 MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
439 in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
440 #endif
441 in->MPIInfo->msg_tag_counter+=4;
442 }
443 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
444 }
445 // check if all nodes are set:
446 #pragma omp parallel for
447 for (int n=0; n<numNodes; ++n) {
448 if (Id[n] == undefined_node) {
449 std::stringstream ss;
450 ss << "NodeFile::gather_global: Node id " << Id[n]
451 << " at position " << n << " is referenced but not defined.";
452 const std::string errorMsg(ss.str());
453 Finley_setError(VALUE_ERROR, errorMsg.c_str());
454 }
455 }
456 delete[] Id_buffer;
457 delete[] Tag_buffer;
458 delete[] globalDegreesOfFreedom_buffer;
459 delete[] Coordinates_buffer;
460 // make sure that the error is global
461 Esys_MPIInfo_noError(in->MPIInfo);
462 }
463
464 void NodeFile::assignMPIRankToDOFs(Esys_MPI_rank* mpiRankOfDOF,
465 int *distribution)
466 {
467 Esys_MPI_rank p_min=MPIInfo->size, p_max=-1;
468 // first we retrieve the min and max DOF on this processor to reduce
469 // costs for searching
470 const std::pair<int,int> dof_range(getDOFRange());
471
472 for (int p=0; p<MPIInfo->size; ++p) {
473 if (distribution[p]<=dof_range.first) p_min=p;
474 if (distribution[p]<=dof_range.second) p_max=p;
475 }
476 #pragma omp parallel for
477 for (int n=0; n<numNodes; ++n) {
478 const int k=globalDegreesOfFreedom[n];
479 for (int p=p_min; p<=p_max; ++p) {
480 if (k < distribution[p+1]) {
481 mpiRankOfDOF[n]=p;
482 break;
483 }
484 }
485 }
486 }
487
488 int NodeFile::prepareLabeling(int* mask, std::vector<int>& buffer,
489 std::vector<int>& distribution, bool useNodes)
490 {
491 const int UNSET_ID=-1,SET_ID=1;
492
493 // get the global range of DOF/node ids
494 std::pair<int,int> idRange(useNodes ?
495 getGlobalNodeIDIndexRange() : getGlobalDOFRange());
496 const int* indexArray = (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
497 // distribute the range of node ids
498 distribution.assign(MPIInfo->size+1, 0);
499 int buffer_len=Esys_MPIInfo_setDistribution(MPIInfo, idRange.first,
500 idRange.second, &distribution[0]);
501 const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
502
503 // fill buffer by the UNSET_ID marker to check if nodes are defined
504 buffer.assign(buffer_len, UNSET_ID);
505
506 // fill the buffer by sending portions around in a circle
507 #ifdef ESYS_MPI
508 MPI_Status status;
509 int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
510 int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
511 #endif
512 int buffer_rank=MPIInfo->rank;
513 for (int p=0; p<MPIInfo->size; ++p) {
514 if (p>0) { // the initial send can be skipped
515 #ifdef ESYS_MPI
516 MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
517 MPIInfo->msg_tag_counter, source, MPIInfo->msg_tag_counter,
518 MPIInfo->comm, &status);
519 #endif
520 MPIInfo->msg_tag_counter++;
521 }
522 buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
523 const int id0=distribution[buffer_rank];
524 const int id1=distribution[buffer_rank+1];
525 #pragma omp parallel for
526 for (int n=0; n<numNodes; n++) {
527 if (!mask || mask[n]>-1) {
528 const int k=indexArray[n];
529 if (id0<=k && k<id1) {
530 buffer[k-id0] = SET_ID;
531 }
532 }
533 }
534 }
535 // count the entries in the buffer
536 // TODO: OMP parallel
537 int myNewCount=0;
538 for (int n=0; n<myCount; ++n) {
539 if (buffer[n] == SET_ID) {
540 buffer[n]=myNewCount;
541 myNewCount++;
542 }
543 }
544 return myNewCount;
545 }
546
547 int NodeFile::createDenseDOFLabeling()
548 {
549 std::vector<int> DOF_buffer;
550 std::vector<int> distribution;
551 std::vector<int> loc_offsets(MPIInfo->size);
552 std::vector<int> offsets(MPIInfo->size);
553 int new_numGlobalDOFs=0;
554
555 // retrieve the number of own DOFs and fill buffer
556 loc_offsets[MPIInfo->rank]=prepareLabeling(NULL, DOF_buffer, distribution,
557 false);
558 #ifdef ESYS_MPI
559 MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
560 MPI_SUM, MPIInfo->comm);
561 for (int n=0; n<MPIInfo->size; ++n) {
562 loc_offsets[n]=new_numGlobalDOFs;
563 new_numGlobalDOFs+=offsets[n];
564 }
565 #else
566 new_numGlobalDOFs=loc_offsets[0];
567 loc_offsets[0]=0;
568 #endif
569
570 const int myDOFs=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
571 #pragma omp parallel for
572 for (int n=0; n<myDOFs; ++n)
573 DOF_buffer[n]+=loc_offsets[MPIInfo->rank];
574
575 std::vector<bool> set_new_DOF(numNodes, true);
576
577 // now entries are collected from the buffer again by sending them around
578 // in a circle
579 #ifdef ESYS_MPI
580 int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
581 int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
582 #endif
583 int buffer_rank=MPIInfo->rank;
584 for (int p=0; p<MPIInfo->size; ++p) {
585 const int dof0=distribution[buffer_rank];
586 const int dof1=distribution[buffer_rank+1];
587 #pragma omp parallel for
588 for (int n=0; n<numNodes; n++) {
589 const int k=globalDegreesOfFreedom[n];
590 if (set_new_DOF[n] && dof0<=k && k<dof1) {
591 globalDegreesOfFreedom[n]=DOF_buffer[k-dof0];
592 set_new_DOF[n]=false;
593 }
594 }
595 if (p<MPIInfo->size-1) { // the last send can be skipped
596 #ifdef ESYS_MPI
597 MPI_Status status;
598 MPI_Sendrecv_replace(&DOF_buffer[0], DOF_buffer.size(), MPI_INT,
599 dest, MPIInfo->msg_tag_counter, source,
600 MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
601 #endif
602 MPIInfo->msg_tag_counter+=1;
603 }
604 buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
605 }
606
607 return new_numGlobalDOFs;
608 }
609
610 int NodeFile::createDenseNodeLabeling(int* node_distribution,
611 const int* dof_distribution)
612 {
613 const int UNSET_ID=-1, SET_ID=1;
614 const int myFirstDOF=dof_distribution[MPIInfo->rank];
615 const int myLastDOF=dof_distribution[MPIInfo->rank+1];
616
617 // find the range of node ids controlled by me
618 int min_id=std::numeric_limits<int>::max();
619 int max_id=std::numeric_limits<int>::min();
620 #pragma omp parallel
621 {
622 int loc_max_id=max_id;
623 int loc_min_id=min_id;
624 #pragma omp for
625 for (int n=0; n<numNodes; n++) {
626 const int dof=globalDegreesOfFreedom[n];
627 if (myFirstDOF<=dof && dof<myLastDOF) {
628 loc_max_id=std::max(loc_max_id, Id[n]);
629 loc_min_id=std::min(loc_min_id, Id[n]);
630 }
631 }
632 #pragma omp critical
633 {
634 max_id=std::max(loc_max_id, max_id);
635 min_id=std::min(loc_min_id, min_id);
636 }
637 }
638 int my_buffer_len = (max_id>=min_id ? max_id-min_id+1 : 0);
639 int buffer_len;
640
641 #ifdef ESYS_MPI
642 MPI_Allreduce(&my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX,
643 MPIInfo->comm);
644 #else
645 buffer_len=my_buffer_len;
646 #endif
647
648 const int header_len=2;
649 std::vector<int> Node_buffer(buffer_len+header_len, UNSET_ID);
650 // extra storage for these IDs
651 Node_buffer[0]=min_id;
652 Node_buffer[1]=max_id;
653
654 // mark and count the nodes in use
655 #pragma omp parallel
656 {
657 #pragma omp for nowait
658 for (int n=0; n<numNodes; n++)
659 globalNodesIndex[n]=-1;
660 #pragma omp for
661 for (int n=0; n<numNodes; n++) {
662 const int dof=globalDegreesOfFreedom[n];
663 if (myFirstDOF<=dof && dof<myLastDOF)
664 Node_buffer[Id[n]-min_id+header_len]=SET_ID;
665 }
666 }
667 int myNewNumNodes=0;
668 for (int n=0; n<my_buffer_len; n++) {
669 if (Node_buffer[header_len+n]==SET_ID) {
670 Node_buffer[header_len+n]=myNewNumNodes;
671 myNewNumNodes++;
672 }
673 }
674 // make the local number of nodes globally available
675 #ifdef ESYS_MPI
676 MPI_Allgather(&myNewNumNodes, 1, MPI_INT, node_distribution, 1, MPI_INT,
677 MPIInfo->comm);
678 #else
679 node_distribution[0]=myNewNumNodes;
680 #endif
681
682 int globalNumNodes=0;
683 for (int p=0; p<MPIInfo->size; ++p) {
684 const int itmp=node_distribution[p];
685 node_distribution[p]=globalNumNodes;
686 globalNumNodes+=itmp;
687 }
688 node_distribution[MPIInfo->size]=globalNumNodes;
689
690 // offset node buffer
691 #pragma omp for
692 for (int n=0; n<my_buffer_len; n++)
693 Node_buffer[n+header_len]+=node_distribution[MPIInfo->rank];
694
695 // now we send this buffer around to assign global node index
696 #ifdef ESYS_MPI
697 int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
698 int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
699 #endif
700 int buffer_rank=MPIInfo->rank;
701 for (int p=0; p<MPIInfo->size; ++p) {
702 const int nodeID_0=Node_buffer[0];
703 const int nodeID_1=Node_buffer[1];
704 const int dof0=dof_distribution[buffer_rank];
705 const int dof1=dof_distribution[buffer_rank+1];
706 if (nodeID_0 <= nodeID_1) {
707 #pragma omp for
708 for (int n=0; n<numNodes; n++) {
709 const int dof=globalDegreesOfFreedom[n];
710 const int id=Id[n]-nodeID_0;
711 if (dof0<=dof && dof<dof1 && id>=0 && id<=nodeID_1-nodeID_0)
712 globalNodesIndex[n]=Node_buffer[id+header_len];
713 }
714 }
715 if (p<MPIInfo->size-1) { // the last send can be skipped
716 #ifdef ESYS_MPI
717 MPI_Status status;
718 MPI_Sendrecv_replace(&Node_buffer[0], Node_buffer.size(), MPI_INT,
719 dest, MPIInfo->msg_tag_counter, source,
720 MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
721 #endif
722 MPIInfo->msg_tag_counter+=1;
723 }
724 buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
725 }
726 return globalNumNodes;
727 }
728
729 int NodeFile::createDenseReducedLabeling(int* reducedMask, bool useNodes)
730 {
731 std::vector<int> buffer;
732 std::vector<int> distribution;
733 std::vector<int> loc_offsets(MPIInfo->size);
734 std::vector<int> offsets(MPIInfo->size);
735 int new_numGlobalReduced=0;
736
737 // retrieve the number of own DOFs/nodes and fill buffer
738 loc_offsets[MPIInfo->rank]=prepareLabeling(reducedMask, buffer,
739 distribution, useNodes);
740 #ifdef ESYS_MPI
741 MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
742 MPI_SUM, MPIInfo->comm);
743 for (int n=0; n<MPIInfo->size; ++n) {
744 loc_offsets[n]=new_numGlobalReduced;
745 new_numGlobalReduced+=offsets[n];
746 }
747 #else
748 new_numGlobalReduced=loc_offsets[0];
749 loc_offsets[0]=0;
750 #endif
751
752 const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
753 #pragma omp parallel for
754 for (int n=0; n<myCount; ++n)
755 buffer[n]+=loc_offsets[MPIInfo->rank];
756
757 const int* denseArray =
758 (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
759 int* reducedArray =
760 (useNodes ? globalReducedNodesIndex : globalReducedDOFIndex);
761
762 #pragma omp parallel for
763 for (int n=0; n<numNodes; ++n)
764 reducedArray[n]=loc_offsets[0]-1;
765
766 // now entries are collected from the buffer by sending them around
767 // in a circle
768 #ifdef ESYS_MPI
769 int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
770 int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
771 #endif
772 int buffer_rank=MPIInfo->rank;
773 for (int p=0; p<MPIInfo->size; ++p) {
774 const int id0=distribution[buffer_rank];
775 const int id1=distribution[buffer_rank+1];
776 #pragma omp parallel for
777 for (int n=0; n<numNodes; n++) {
778 if (reducedMask[n] > -1) {
779 const int k=denseArray[n];
780 if (id0<=k && k<id1)
781 reducedArray[n]=buffer[k-id0];
782 }
783 }
784 if (p<MPIInfo->size-1) { // the last send can be skipped
785 #ifdef ESYS_MPI
786 MPI_Status status;
787 MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
788 MPIInfo->msg_tag_counter, source,
789 MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
790 #endif
791 MPIInfo->msg_tag_counter+=1;
792 }
793 buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
794 }
795 return new_numGlobalReduced;
796 }
797
798 } // namespace finley
799

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/lapack2681/finley/src/NodeFile.cpp:2682-2741 /branches/pasowrap/finley/src/NodeFile.cpp:3661-3674 /branches/py3_attempt2/finley/src/NodeFile.cpp:3871-3891 /branches/restext/finley/src/NodeFile.cpp:2610-2624 /branches/ripleygmg_from_3668/finley/src/NodeFile.cpp:3669-3791 /branches/stage3.0/finley/src/NodeFile.cpp:2569-2590 /branches/symbolic_from_3470/finley/src/NodeFile.cpp:3471-3974 /release/3.0/finley/src/NodeFile.cpp:2591-2601 /trunk/finley/src/NodeFile.cpp:4257-4344

  ViewVC Help
Powered by ViewVC 1.1.26