/[escript]/trunk/finley/src/NodeFile.cpp
ViewVC logotype

Contents of /trunk/finley/src/NodeFile.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4441 - (show annotations)
Fri Jun 7 02:23:49 2013 UTC (6 years, 3 months ago) by caltinay
File size: 28094 byte(s)
finley now uses Data objects directly instead of going through the C wrapper.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /****************************************************************************
18
19 Finley: NodeFile
20
21 *****************************************************************************/
22
23 #include "NodeFile.h"
24 #include <escript/Data.h>
25
26 #include <limits>
27 #include <sstream>
28
29 namespace finley {
30
31 // helper function
32 static void scatterEntries(int n, int* index, int min_index, int max_index,
33 int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
34 int* globalDegreesOfFreedom_out,
35 int* globalDegreesOfFreedom_in,
36 int numDim, double* Coordinates_out,
37 double* Coordinates_in)
38 {
39 const int range = max_index-min_index;
40 const size_t numDim_size = numDim*sizeof(double);
41
42 #pragma omp parallel for
43 for (int i=0; i<n; i++) {
44 const int k=index[i]-min_index;
45 if ((k>=0) && (k<range)) {
46 Id_out[k]=Id_in[i];
47 Tag_out[k]=Tag_in[i];
48 globalDegreesOfFreedom_out[k]=globalDegreesOfFreedom_in[i];
49 memcpy(&(Coordinates_out[INDEX2(0,k,numDim)]),
50 &(Coordinates_in[INDEX2(0,i,numDim)]), numDim_size);
51 }
52 }
53 }
54
55 // helper function
56 static void gatherEntries(int n, int* index, int min_index, int max_index,
57 int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
58 int* globalDegreesOfFreedom_out,
59 int* globalDegreesOfFreedom_in,
60 int numDim, double* Coordinates_out,
61 double* Coordinates_in)
62 {
63 const int range = max_index-min_index;
64 const size_t numDim_size = numDim*sizeof(double);
65
66 #pragma omp parallel for
67 for (int i=0; i<n; i++) {
68 const int k=index[i]-min_index;
69 if ((k>=0) && (k<range)) {
70 Id_out[i]=Id_in[k];
71 Tag_out[i]=Tag_in[k];
72 globalDegreesOfFreedom_out[i]=globalDegreesOfFreedom_in[k];
73 memcpy(&(Coordinates_out[INDEX2(0,i,numDim)]),
74 &(Coordinates_in[INDEX2(0,k,numDim)]), numDim_size);
75 }
76 }
77 }
78
79 /// constructor
80 /// use NodeFile::allocTable to allocate the node table (Id,Coordinates)
81 NodeFile::NodeFile(int nDim, Esys_MPIInfo *mpiInfo) :
82 numNodes(0),
83 numDim(nDim),
84 Id(NULL),
85 Tag(NULL),
86 globalDegreesOfFreedom(NULL),
87 Coordinates(NULL),
88 globalReducedDOFIndex(NULL),
89 globalReducedNodesIndex(NULL),
90 globalNodesIndex(NULL),
91 nodesMapping(NULL),
92 reducedNodesMapping(NULL),
93 degreesOfFreedomMapping(NULL),
94 reducedDegreesOfFreedomMapping(NULL),
95 nodesDistribution(NULL),
96 reducedNodesDistribution(NULL),
97 degreesOfFreedomDistribution(NULL),
98 reducedDegreesOfFreedomDistribution(NULL),
99 degreesOfFreedomConnector(NULL),
100 reducedDegreesOfFreedomConnector(NULL),
101 reducedNodesId(NULL),
102 degreesOfFreedomId(NULL),
103 reducedDegreesOfFreedomId(NULL),
104 status(FINLEY_INITIAL_STATUS)
105 {
106 MPIInfo = Esys_MPIInfo_getReference(mpiInfo);
107 }
108
109 /// destructor
110 NodeFile::~NodeFile()
111 {
112 freeTable();
113 Esys_MPIInfo_free(MPIInfo);
114 }
115
116 /// allocates the node table within this node file to hold NN nodes.
117 void NodeFile::allocTable(int NN)
118 {
119 if (numNodes>0)
120 freeTable();
121
122 Id=new int[NN];
123 Coordinates=new double[NN*numDim];
124 Tag=new int[NN];
125 globalDegreesOfFreedom=new int[NN];
126 globalReducedDOFIndex=new int[NN];
127 globalReducedNodesIndex=new int[NN];
128 globalNodesIndex=new int[NN];
129 reducedNodesId=new int[NN];
130 degreesOfFreedomId=new int[NN];
131 reducedDegreesOfFreedomId=new int[NN];
132 numNodes=NN;
133
134 // this initialization makes sure that data are located on the right
135 // processor
136 #pragma omp parallel for
137 for (int n=0; n<numNodes; n++) {
138 Id[n]=-1;
139 for (int i=0; i<numDim; i++)
140 Coordinates[INDEX2(i,n,numDim)]=0.;
141 Tag[n]=-1;
142 globalDegreesOfFreedom[n]=-1;
143 globalReducedDOFIndex[n]=-1;
144 globalReducedNodesIndex[n]=-1;
145 globalNodesIndex[n]=-1;
146 reducedNodesId[n]=-1;
147 degreesOfFreedomId[n]=-1;
148 reducedDegreesOfFreedomId[n]=-1;
149 }
150 }
151
152 /// frees the node table within this node file
153 void NodeFile::freeTable()
154 {
155 delete[] Id;
156 delete[] Coordinates;
157 delete[] globalDegreesOfFreedom;
158 delete[] globalReducedDOFIndex;
159 delete[] globalReducedNodesIndex;
160 delete[] globalNodesIndex;
161 delete[] Tag;
162 delete[] reducedNodesId;
163 delete[] degreesOfFreedomId;
164 delete[] reducedDegreesOfFreedomId;
165 tagsInUse.clear();
166 Finley_NodeMapping_free(nodesMapping);
167 nodesMapping=NULL;
168 Finley_NodeMapping_free(reducedNodesMapping);
169 reducedNodesMapping=NULL;
170 Finley_NodeMapping_free(degreesOfFreedomMapping);
171 degreesOfFreedomMapping=NULL;
172 Finley_NodeMapping_free(reducedDegreesOfFreedomMapping);
173 reducedDegreesOfFreedomMapping=NULL;
174 Paso_Distribution_free(nodesDistribution);
175 nodesDistribution=NULL;
176 Paso_Distribution_free(reducedNodesDistribution);
177 nodesDistribution=NULL;
178 Paso_Distribution_free(degreesOfFreedomDistribution);
179 degreesOfFreedomDistribution=NULL;
180 Paso_Distribution_free(reducedDegreesOfFreedomDistribution);
181 reducedDegreesOfFreedomDistribution=NULL;
182 Paso_Connector_free(degreesOfFreedomConnector);
183 degreesOfFreedomConnector=NULL;
184 Paso_Connector_free(reducedDegreesOfFreedomConnector);
185 reducedDegreesOfFreedomConnector=NULL;
186
187 numNodes=0;
188 }
189
190 /// copies the array newX into this->coordinates
191 void NodeFile::setCoordinates(const escript::Data& cNewX)
192 {
193 if (cNewX.getDataPointSize() != numDim) {
194 std::stringstream ss;
195 ss << "NodeFile::setCoordinates: number of dimensions of new "
196 "coordinates has to be " << numDim;
197 const std::string errorMsg(ss.str());
198 Finley_setError(VALUE_ERROR, errorMsg.c_str());
199 } else if (cNewX.getNumDataPointsPerSample() != 1 ||
200 cNewX.getNumSamples() != numNodes) {
201 std::stringstream ss;
202 ss << "NodeFile::setCoordinates: number of given nodes must be "
203 << numNodes;
204 const std::string errorMsg(ss.str());
205 Finley_setError(VALUE_ERROR, errorMsg.c_str());
206 } else {
207 const size_t numDim_size=numDim*sizeof(double);
208 Finley_increaseStatus(this);
209 escript::Data& newX = *const_cast<escript::Data*>(&cNewX);
210 #pragma omp parallel for
211 for (int n=0; n<numNodes; n++) {
212 memcpy(&(Coordinates[INDEX2(0,n,numDim)]), newX.getSampleDataRO(n), numDim_size);
213 }
214 }
215 }
216
217 /// sets tags to newTag where mask>0
218 void NodeFile::setTags(const int newTag, const escript::Data& cMask)
219 {
220 Finley_resetError();
221
222 if (1 != cMask.getDataPointSize()) {
223 Finley_setError(TYPE_ERROR, "NodeFile::setTags: number of components of mask must be 1.");
224 return;
225 } else if (cMask.getNumDataPointsPerSample() != 1 ||
226 cMask.getNumSamples() != numNodes) {
227 Finley_setError(TYPE_ERROR, "NodeFile::setTags: illegal number of samples of mask Data object");
228 return;
229 }
230
231 escript::Data& mask = *const_cast<escript::Data*>(&cMask);
232 #pragma omp parallel for
233 for (int n=0; n<numNodes; n++) {
234 if (mask.getSampleDataRO(n)[0] > 0)
235 Tag[n]=newTag;
236 }
237 updateTagList();
238 }
239
240 std::pair<int,int> NodeFile::getDOFRange() const
241 {
242 std::pair<int,int> result(util::getMinMaxInt(
243 1, numNodes, globalDegreesOfFreedom));
244 if (result.second < result.first) {
245 result.first = -1;
246 result.second = 0;
247 }
248 return result;
249 }
250
251 std::pair<int,int> NodeFile::getGlobalIdRange() const
252 {
253 std::pair<int,int> result(util::getMinMaxInt(1, numNodes, Id));
254
255 #ifdef ESYS_MPI
256 int global_id_range[2];
257 int id_range[2] = { -result.first, result.second };
258 MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
259 result.first = -global_id_range[0];
260 result.second = global_id_range[1];
261 #endif
262 if (result.second < result.first) {
263 result.first = -1;
264 result.second = 0;
265 }
266 return result;
267 }
268
269 std::pair<int,int> NodeFile::getGlobalDOFRange() const
270 {
271 std::pair<int,int> result(util::getMinMaxInt(
272 1, numNodes, globalDegreesOfFreedom));
273
274 #ifdef ESYS_MPI
275 int global_id_range[2];
276 int id_range[2] = { -result.first, result.second };
277 MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
278 result.first = -global_id_range[0];
279 result.second = global_id_range[1];
280 #endif
281 if (result.second < result.first) {
282 result.first = -1;
283 result.second = 0;
284 }
285 return result;
286 }
287
288 std::pair<int,int> NodeFile::getGlobalNodeIDIndexRange() const
289 {
290 std::pair<int,int> result(util::getMinMaxInt(1, numNodes, globalNodesIndex));
291
292 #ifdef ESYS_MPI
293 int global_id_range[2];
294 int id_range[2] = { -result.first, result.second };
295 MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
296 result.first = -global_id_range[0];
297 result.second = global_id_range[1];
298 #endif
299 if (result.second < result.first) {
300 result.first = -1;
301 result.second = 0;
302 }
303 return result;
304 }
305
306 void NodeFile::copyTable(int offset, int idOffset, int dofOffset,
307 const NodeFile* in)
308 {
309 // check number of dimensions and table size
310 if (numDim != in->numDim) {
311 Finley_setError(TYPE_ERROR, "NodeFile::copyTable: dimensions of node files don't match");
312 return;
313 }
314 if (numNodes < in->numNodes+offset) {
315 Finley_setError(MEMORY_ERROR, "NodeFile::copyTable: node table is too small.");
316 return;
317 }
318
319 #pragma omp parallel for
320 for (int n=0; n<in->numNodes; n++) {
321 Id[offset+n]=in->Id[n]+idOffset;
322 Tag[offset+n]=in->Tag[n];
323 globalDegreesOfFreedom[offset+n]=in->globalDegreesOfFreedom[n]+dofOffset;
324 for(int i=0; i<numDim; i++)
325 Coordinates[INDEX2(i, offset+n, numDim)] =
326 in->Coordinates[INDEX2(i, n, in->numDim)];
327 }
328 }
329
330 /// scatters the NodeFile in into this NodeFile using index[0:in->numNodes-1].
331 /// index has to be between 0 and numNodes-1.
332 /// colouring is chosen for the worst case
333 void NodeFile::scatter(int* index, const NodeFile* in)
334 {
335 scatterEntries(numNodes, index, 0, in->numNodes, Id, in->Id, Tag, in->Tag,
336 globalDegreesOfFreedom, in->globalDegreesOfFreedom,
337 numDim, Coordinates, in->Coordinates);
338 }
339
340 /// gathers this NodeFile from the NodeFile 'in' using the entries in
341 /// index[0:out->numNodes-1] which are between min_index and max_index
342 /// (exclusive)
343 void NodeFile::gather(int* index, const NodeFile* in)
344 {
345 const std::pair<int,int> id_range(in->getGlobalIdRange());
346 gatherEntries(numNodes, index, id_range.first, id_range.second, Id, in->Id,
347 Tag, in->Tag, globalDegreesOfFreedom, in->globalDegreesOfFreedom,
348 numDim, Coordinates, in->Coordinates);
349 }
350
351 void NodeFile::gather_global(int* index, const NodeFile* in)
352 {
353 // get the global range of node ids
354 const std::pair<int,int> id_range(in->getGlobalIdRange());
355 const int undefined_node=id_range.first-1;
356 std::vector<int> distribution(in->MPIInfo->size+1);
357
358 // distribute the range of node ids
359 int buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,
360 id_range.first, id_range.second, &distribution[0]);
361
362 // allocate buffers
363 int *Id_buffer=new int[buffer_len];
364 int *Tag_buffer=new int[buffer_len];
365 int *globalDegreesOfFreedom_buffer=new int[buffer_len];
366 double *Coordinates_buffer=new double[buffer_len*numDim];
367
368 // fill Id_buffer by the undefined_node marker to check if nodes
369 // are defined
370 #pragma omp parallel for
371 for (int n=0; n<buffer_len; n++)
372 Id_buffer[n]=undefined_node;
373
374 // fill the buffer by sending portions around in a circle
375 #ifdef ESYS_MPI
376 MPI_Status status;
377 int dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
378 int source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
379 #endif
380 int buffer_rank=in->MPIInfo->rank;
381 for (int p=0; p<in->MPIInfo->size; ++p) {
382 if (p>0) { // the initial send can be skipped
383 #ifdef ESYS_MPI
384 MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
385 in->MPIInfo->msg_tag_counter, source,
386 in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
387 MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
388 in->MPIInfo->msg_tag_counter+1, source,
389 in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
390 MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
391 MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
392 in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
393 MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
394 MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
395 in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
396 #endif
397 in->MPIInfo->msg_tag_counter+=4;
398 }
399 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
400 scatterEntries(in->numNodes, in->Id, distribution[buffer_rank],
401 distribution[buffer_rank+1], Id_buffer, in->Id,
402 Tag_buffer, in->Tag, globalDegreesOfFreedom_buffer,
403 in->globalDegreesOfFreedom, numDim, Coordinates_buffer,
404 in->Coordinates);
405 }
406 // now entries are collected from the buffer again by sending the
407 // entries around in a circle
408 #ifdef ESYS_MPI
409 dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
410 source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
411 #endif
412 buffer_rank=in->MPIInfo->rank;
413 for (int p=0; p<in->MPIInfo->size; ++p) {
414 gatherEntries(numNodes, index, distribution[buffer_rank],
415 distribution[buffer_rank+1], Id, Id_buffer, Tag, Tag_buffer,
416 globalDegreesOfFreedom, globalDegreesOfFreedom_buffer, numDim,
417 Coordinates, Coordinates_buffer);
418 if (p < in->MPIInfo->size-1) { // the last send can be skipped
419 #ifdef ESYS_MPI
420 MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
421 in->MPIInfo->msg_tag_counter, source,
422 in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
423 MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
424 in->MPIInfo->msg_tag_counter+1, source,
425 in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
426 MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
427 MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
428 in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
429 MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
430 MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
431 in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
432 #endif
433 in->MPIInfo->msg_tag_counter+=4;
434 }
435 buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
436 }
437 // check if all nodes are set:
438 #pragma omp parallel for
439 for (int n=0; n<numNodes; ++n) {
440 if (Id[n] == undefined_node) {
441 std::stringstream ss;
442 ss << "NodeFile::gather_global: Node id " << Id[n]
443 << " at position " << n << " is referenced but not defined.";
444 const std::string errorMsg(ss.str());
445 Finley_setError(VALUE_ERROR, errorMsg.c_str());
446 }
447 }
448 delete[] Id_buffer;
449 delete[] Tag_buffer;
450 delete[] globalDegreesOfFreedom_buffer;
451 delete[] Coordinates_buffer;
452 // make sure that the error is global
453 Esys_MPIInfo_noError(in->MPIInfo);
454 }
455
456 void NodeFile::assignMPIRankToDOFs(Esys_MPI_rank* mpiRankOfDOF,
457 int *distribution)
458 {
459 Esys_MPI_rank p_min=MPIInfo->size, p_max=-1;
460 // first we retrieve the min and max DOF on this processor to reduce
461 // costs for searching
462 const std::pair<int,int> dof_range(getDOFRange());
463
464 for (int p=0; p<MPIInfo->size; ++p) {
465 if (distribution[p]<=dof_range.first) p_min=p;
466 if (distribution[p]<=dof_range.second) p_max=p;
467 }
468 #pragma omp parallel for
469 for (int n=0; n<numNodes; ++n) {
470 const int k=globalDegreesOfFreedom[n];
471 for (int p=p_min; p<=p_max; ++p) {
472 if (k < distribution[p+1]) {
473 mpiRankOfDOF[n]=p;
474 break;
475 }
476 }
477 }
478 }
479
480 int NodeFile::prepareLabeling(int* mask, std::vector<int>& buffer,
481 std::vector<int>& distribution, bool useNodes)
482 {
483 const int UNSET_ID=-1,SET_ID=1;
484
485 // get the global range of DOF/node ids
486 std::pair<int,int> idRange(useNodes ?
487 getGlobalNodeIDIndexRange() : getGlobalDOFRange());
488 const int* indexArray = (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
489 // distribute the range of node ids
490 distribution.assign(MPIInfo->size+1, 0);
491 int buffer_len=Esys_MPIInfo_setDistribution(MPIInfo, idRange.first,
492 idRange.second, &distribution[0]);
493 const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
494
495 // fill buffer by the UNSET_ID marker to check if nodes are defined
496 buffer.assign(buffer_len, UNSET_ID);
497
498 // fill the buffer by sending portions around in a circle
499 #ifdef ESYS_MPI
500 MPI_Status status;
501 int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
502 int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
503 #endif
504 int buffer_rank=MPIInfo->rank;
505 for (int p=0; p<MPIInfo->size; ++p) {
506 if (p>0) { // the initial send can be skipped
507 #ifdef ESYS_MPI
508 MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
509 MPIInfo->msg_tag_counter, source, MPIInfo->msg_tag_counter,
510 MPIInfo->comm, &status);
511 #endif
512 MPIInfo->msg_tag_counter++;
513 }
514 buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
515 const int id0=distribution[buffer_rank];
516 const int id1=distribution[buffer_rank+1];
517 #pragma omp parallel for
518 for (int n=0; n<numNodes; n++) {
519 if (!mask || mask[n]>-1) {
520 const int k=indexArray[n];
521 if (id0<=k && k<id1) {
522 buffer[k-id0] = SET_ID;
523 }
524 }
525 }
526 }
527 // count the entries in the buffer
528 // TODO: OMP parallel
529 int myNewCount=0;
530 for (int n=0; n<myCount; ++n) {
531 if (buffer[n] == SET_ID) {
532 buffer[n]=myNewCount;
533 myNewCount++;
534 }
535 }
536 return myNewCount;
537 }
538
539 int NodeFile::createDenseDOFLabeling()
540 {
541 std::vector<int> DOF_buffer;
542 std::vector<int> distribution;
543 std::vector<int> loc_offsets(MPIInfo->size);
544 std::vector<int> offsets(MPIInfo->size);
545 int new_numGlobalDOFs=0;
546
547 // retrieve the number of own DOFs and fill buffer
548 loc_offsets[MPIInfo->rank]=prepareLabeling(NULL, DOF_buffer, distribution,
549 false);
550 #ifdef ESYS_MPI
551 MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
552 MPI_SUM, MPIInfo->comm);
553 for (int n=0; n<MPIInfo->size; ++n) {
554 loc_offsets[n]=new_numGlobalDOFs;
555 new_numGlobalDOFs+=offsets[n];
556 }
557 #else
558 new_numGlobalDOFs=loc_offsets[0];
559 loc_offsets[0]=0;
560 #endif
561
562 const int myDOFs=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
563 #pragma omp parallel for
564 for (int n=0; n<myDOFs; ++n)
565 DOF_buffer[n]+=loc_offsets[MPIInfo->rank];
566
567 std::vector<bool_t> set_new_DOF(numNodes, TRUE);
568
569 // now entries are collected from the buffer again by sending them around
570 // in a circle
571 #ifdef ESYS_MPI
572 int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
573 int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
574 #endif
575 int buffer_rank=MPIInfo->rank;
576 for (int p=0; p<MPIInfo->size; ++p) {
577 const int dof0=distribution[buffer_rank];
578 const int dof1=distribution[buffer_rank+1];
579 #pragma omp parallel for
580 for (int n=0; n<numNodes; n++) {
581 const int k=globalDegreesOfFreedom[n];
582 if (set_new_DOF[n] && dof0<=k && k<dof1) {
583 globalDegreesOfFreedom[n]=DOF_buffer[k-dof0];
584 set_new_DOF[n]=FALSE;
585 }
586 }
587 if (p<MPIInfo->size-1) { // the last send can be skipped
588 #ifdef ESYS_MPI
589 MPI_Status status;
590 MPI_Sendrecv_replace(&DOF_buffer[0], DOF_buffer.size(), MPI_INT,
591 dest, MPIInfo->msg_tag_counter, source,
592 MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
593 #endif
594 MPIInfo->msg_tag_counter+=1;
595 }
596 buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
597 }
598
599 return new_numGlobalDOFs;
600 }
601
602 int NodeFile::createDenseNodeLabeling(int* node_distribution,
603 const int* dof_distribution)
604 {
605 const int UNSET_ID=-1, SET_ID=1;
606 const int myFirstDOF=dof_distribution[MPIInfo->rank];
607 const int myLastDOF=dof_distribution[MPIInfo->rank+1];
608
609 // find the range of node ids controlled by me
610 int min_id=std::numeric_limits<int>::max();
611 int max_id=std::numeric_limits<int>::min();
612 #pragma omp parallel
613 {
614 int loc_max_id=max_id;
615 int loc_min_id=min_id;
616 #pragma omp for
617 for (int n=0; n<numNodes; n++) {
618 const int dof=globalDegreesOfFreedom[n];
619 if (myFirstDOF<=dof && dof<myLastDOF) {
620 loc_max_id=std::max(loc_max_id, Id[n]);
621 loc_min_id=std::min(loc_min_id, Id[n]);
622 }
623 }
624 #pragma omp critical
625 {
626 max_id=std::max(loc_max_id, max_id);
627 min_id=std::min(loc_min_id, min_id);
628 }
629 }
630 int my_buffer_len = (max_id>=min_id ? max_id-min_id+1 : 0);
631 int buffer_len;
632
633 #ifdef ESYS_MPI
634 MPI_Allreduce(&my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX,
635 MPIInfo->comm);
636 #else
637 buffer_len=my_buffer_len;
638 #endif
639
640 const int header_len=2;
641 std::vector<int> Node_buffer(buffer_len+header_len, UNSET_ID);
642 // extra storage for these IDs
643 Node_buffer[0]=min_id;
644 Node_buffer[1]=max_id;
645
646 // mark and count the nodes in use
647 #pragma omp parallel for
648 for (int n=0; n<numNodes; n++) {
649 globalNodesIndex[n]=-1;
650 const int dof=globalDegreesOfFreedom[n];
651 if (myFirstDOF<=dof && dof<myLastDOF)
652 Node_buffer[Id[n]-min_id+header_len]=SET_ID;
653 }
654 int myNewNumNodes=0;
655 for (int n=0; n<my_buffer_len; n++) {
656 if (Node_buffer[header_len+n]==SET_ID) {
657 Node_buffer[header_len+n]=myNewNumNodes;
658 myNewNumNodes++;
659 }
660 }
661 // make the local number of nodes globally available
662 #ifdef ESYS_MPI
663 MPI_Allgather(&myNewNumNodes, 1, MPI_INT, node_distribution, 1, MPI_INT,
664 MPIInfo->comm);
665 #else
666 node_distribution[0]=myNewNumNodes;
667 #endif
668
669 int globalNumNodes=0;
670 for (int p=0; p<MPIInfo->size; ++p) {
671 const int itmp=node_distribution[p];
672 node_distribution[p]=globalNumNodes;
673 globalNumNodes+=itmp;
674 }
675 node_distribution[MPIInfo->size]=globalNumNodes;
676
677 // offset node buffer
678 #pragma omp parallel for
679 for (int n=0; n<my_buffer_len; n++)
680 Node_buffer[n+header_len]+=node_distribution[MPIInfo->rank];
681
682 // now we send this buffer around to assign global node index
683 #ifdef ESYS_MPI
684 int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
685 int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
686 #endif
687 int buffer_rank=MPIInfo->rank;
688 for (int p=0; p<MPIInfo->size; ++p) {
689 const int nodeID_0=Node_buffer[0];
690 const int nodeID_1=Node_buffer[1];
691 const int dof0=dof_distribution[buffer_rank];
692 const int dof1=dof_distribution[buffer_rank+1];
693 if (nodeID_0 <= nodeID_1) {
694 #pragma omp parallel for
695 for (int n=0; n<numNodes; n++) {
696 const int dof=globalDegreesOfFreedom[n];
697 const int id=Id[n]-nodeID_0;
698 if (dof0<=dof && dof<dof1 && id>=0 && id<=nodeID_1-nodeID_0)
699 globalNodesIndex[n]=Node_buffer[id+header_len];
700 }
701 }
702 if (p<MPIInfo->size-1) { // the last send can be skipped
703 #ifdef ESYS_MPI
704 MPI_Status status;
705 MPI_Sendrecv_replace(&Node_buffer[0], Node_buffer.size(), MPI_INT,
706 dest, MPIInfo->msg_tag_counter, source,
707 MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
708 #endif
709 MPIInfo->msg_tag_counter+=1;
710 }
711 buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
712 }
713 return globalNumNodes;
714 }
715
716 int NodeFile::createDenseReducedLabeling(int* reducedMask, bool useNodes)
717 {
718 std::vector<int> buffer;
719 std::vector<int> distribution;
720 std::vector<int> loc_offsets(MPIInfo->size);
721 std::vector<int> offsets(MPIInfo->size);
722 int new_numGlobalReduced=0;
723
724 // retrieve the number of own DOFs/nodes and fill buffer
725 loc_offsets[MPIInfo->rank]=prepareLabeling(reducedMask, buffer,
726 distribution, useNodes);
727 #ifdef ESYS_MPI
728 MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
729 MPI_SUM, MPIInfo->comm);
730 for (int n=0; n<MPIInfo->size; ++n) {
731 loc_offsets[n]=new_numGlobalReduced;
732 new_numGlobalReduced+=offsets[n];
733 }
734 #else
735 new_numGlobalReduced=loc_offsets[0];
736 loc_offsets[0]=0;
737 #endif
738
739 const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
740 #pragma omp parallel for
741 for (int n=0; n<myCount; ++n)
742 buffer[n]+=loc_offsets[MPIInfo->rank];
743
744 const int* denseArray =
745 (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
746 int* reducedArray =
747 (useNodes ? globalReducedNodesIndex : globalReducedDOFIndex);
748
749 #pragma omp parallel for
750 for (int n=0; n<numNodes; ++n)
751 reducedArray[n]=loc_offsets[0]-1;
752
753 // now entries are collected from the buffer by sending them around
754 // in a circle
755 #ifdef ESYS_MPI
756 int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
757 int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
758 #endif
759 int buffer_rank=MPIInfo->rank;
760 for (int p=0; p<MPIInfo->size; ++p) {
761 const int id0=distribution[buffer_rank];
762 const int id1=distribution[buffer_rank+1];
763 #pragma omp parallel for
764 for (int n=0; n<numNodes; n++) {
765 if (reducedMask[n] > -1) {
766 const int k=denseArray[n];
767 if (id0<=k && k<id1)
768 reducedArray[n]=buffer[k-id0];
769 }
770 }
771 if (p<MPIInfo->size-1) { // the last send can be skipped
772 #ifdef ESYS_MPI
773 MPI_Status status;
774 MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
775 MPIInfo->msg_tag_counter, source,
776 MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
777 #endif
778 MPIInfo->msg_tag_counter+=1;
779 }
780 buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
781 }
782 return new_numGlobalReduced;
783 }
784
785 } // namespace finley
786

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/lapack2681/finley/src/NodeFile.cpp:2682-2741 /branches/pasowrap/finley/src/NodeFile.cpp:3661-3674 /branches/py3_attempt2/finley/src/NodeFile.cpp:3871-3891 /branches/restext/finley/src/NodeFile.cpp:2610-2624 /branches/ripleygmg_from_3668/finley/src/NodeFile.cpp:3669-3791 /branches/stage3.0/finley/src/NodeFile.cpp:2569-2590 /branches/symbolic_from_3470/finley/src/NodeFile.cpp:3471-3974 /release/3.0/finley/src/NodeFile.cpp:2591-2601 /trunk/finley/src/NodeFile.cpp:4257-4344

  ViewVC Help
Powered by ViewVC 1.1.26