/[escript]/trunk/finley/src/NodeFile.cpp
ViewVC logotype

Annotation of /trunk/finley/src/NodeFile.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4431 - (hide annotations)
Fri May 31 07:09:03 2013 UTC (6 years, 3 months ago) by caltinay
File size: 28196 byte(s)
finley ElementFile is now a class...

1 jgs 150
2 jfenwick 3981 /*****************************************************************************
3 ksteube 1811 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 ksteube 1811 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 ksteube 1312
16 ksteube 1811
17 caltinay 4428 /****************************************************************************
18 jgs 82
19 caltinay 4428 Finley: NodeFile
20    
21     *****************************************************************************/
22    
23 jgs 82 #include "NodeFile.h"
24 caltinay 4428 #include "Util.h"
25     #include <escript/Data.h>
26 jgs 82
27 caltinay 4428 #include <limits>
28     #include <sstream>
29 jgs 82
30 caltinay 4428 namespace finley {
31 ksteube 1312
32 caltinay 4428 // helper function
33     static void scatterEntries(int n, int* index, int min_index, int max_index,
34     int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
35     int* globalDegreesOfFreedom_out,
36     int* globalDegreesOfFreedom_in,
37     int numDim, double* Coordinates_out,
38     double* Coordinates_in)
39 ksteube 1312 {
40 caltinay 4428 const int range = max_index-min_index;
41     const size_t numDim_size = numDim*sizeof(double);
42 ksteube 1312
43 caltinay 4428 #pragma omp parallel for
44     for (int i=0; i<n; i++) {
45     const int k=index[i]-min_index;
46     if ((k>=0) && (k<range)) {
47     Id_out[k]=Id_in[i];
48     Tag_out[k]=Tag_in[i];
49     globalDegreesOfFreedom_out[k]=globalDegreesOfFreedom_in[i];
50     memcpy(&(Coordinates_out[INDEX2(0,k,numDim)]),
51     &(Coordinates_in[INDEX2(0,i,numDim)]), numDim_size);
52     }
53     }
54     }
55 ksteube 1312
56 caltinay 4428 // helper function
57     static void gatherEntries(int n, int* index, int min_index, int max_index,
58     int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
59     int* globalDegreesOfFreedom_out,
60     int* globalDegreesOfFreedom_in,
61     int numDim, double* Coordinates_out,
62     double* Coordinates_in)
63     {
64     const int range = max_index-min_index;
65     const size_t numDim_size = numDim*sizeof(double);
66 ksteube 1312
67 caltinay 4428 #pragma omp parallel for
68     for (int i=0; i<n; i++) {
69     const int k=index[i]-min_index;
70     if ((k>=0) && (k<range)) {
71     Id_out[i]=Id_in[k];
72     Tag_out[i]=Tag_in[k];
73     globalDegreesOfFreedom_out[i]=globalDegreesOfFreedom_in[k];
74     memcpy(&(Coordinates_out[INDEX2(0,i,numDim)]),
75     &(Coordinates_in[INDEX2(0,k,numDim)]), numDim_size);
76     }
77     }
78 jgs 82 }
79    
80 caltinay 4428 /// constructor
81     /// use NodeFile::allocTable to allocate the node table (Id,Coordinates)
82     NodeFile::NodeFile(int nDim, Esys_MPIInfo *mpiInfo) :
83     numNodes(0),
84     numDim(nDim),
85     Id(NULL),
86     Tag(NULL),
87     globalDegreesOfFreedom(NULL),
88     Coordinates(NULL),
89     globalReducedDOFIndex(NULL),
90     globalReducedNodesIndex(NULL),
91     globalNodesIndex(NULL),
92     nodesMapping(NULL),
93     reducedNodesMapping(NULL),
94     degreesOfFreedomMapping(NULL),
95     reducedDegreesOfFreedomMapping(NULL),
96     nodesDistribution(NULL),
97     reducedNodesDistribution(NULL),
98     degreesOfFreedomDistribution(NULL),
99     reducedDegreesOfFreedomDistribution(NULL),
100     degreesOfFreedomConnector(NULL),
101     reducedDegreesOfFreedomConnector(NULL),
102     reducedNodesId(NULL),
103     degreesOfFreedomId(NULL),
104     reducedDegreesOfFreedomId(NULL),
105     status(FINLEY_INITIAL_STATUS)
106     {
107     MPIInfo = Esys_MPIInfo_getReference(mpiInfo);
108 jgs 82 }
109 ksteube 1312
110 caltinay 4428 /// destructor
111     NodeFile::~NodeFile()
112     {
113     freeTable();
114     Esys_MPIInfo_free(MPIInfo);
115 ksteube 1312 }
116    
117 caltinay 4431 /// allocates the node table within this node file to hold NN nodes.
118 caltinay 4428 void NodeFile::allocTable(int NN)
119     {
120     if (numNodes>0)
121     freeTable();
122    
123     Id=new int[NN];
124     Coordinates=new double[NN*numDim];
125     Tag=new int[NN];
126     globalDegreesOfFreedom=new int[NN];
127     globalReducedDOFIndex=new int[NN];
128     globalReducedNodesIndex=new int[NN];
129     globalNodesIndex=new int[NN];
130     reducedNodesId=new int[NN];
131     degreesOfFreedomId=new int[NN];
132     reducedDegreesOfFreedomId=new int[NN];
133     numNodes=NN;
134    
135     // this initialization makes sure that data are located on the right
136     // processor
137     #pragma omp parallel for
138     for (int n=0; n<numNodes; n++) {
139     Id[n]=-1;
140     for (int i=0; i<numDim; i++)
141     Coordinates[INDEX2(i,n,numDim)]=0.;
142     Tag[n]=-1;
143     globalDegreesOfFreedom[n]=-1;
144     globalReducedDOFIndex[n]=-1;
145     globalReducedNodesIndex[n]=-1;
146     globalNodesIndex[n]=-1;
147     reducedNodesId[n]=-1;
148     degreesOfFreedomId[n]=-1;
149     reducedDegreesOfFreedomId[n]=-1;
150     }
151 ksteube 1312 }
152    
153 caltinay 4431 /// frees the node table within this node file
154 caltinay 4428 void NodeFile::freeTable()
155     {
156     delete[] Id;
157     delete[] Coordinates;
158     delete[] globalDegreesOfFreedom;
159     delete[] globalReducedDOFIndex;
160     delete[] globalReducedNodesIndex;
161     delete[] globalNodesIndex;
162     delete[] Tag;
163     delete[] reducedNodesId;
164     delete[] degreesOfFreedomId;
165     delete[] reducedDegreesOfFreedomId;
166     tagsInUse.clear();
167     Finley_NodeMapping_free(nodesMapping);
168     nodesMapping=NULL;
169     Finley_NodeMapping_free(reducedNodesMapping);
170     reducedNodesMapping=NULL;
171     Finley_NodeMapping_free(degreesOfFreedomMapping);
172     degreesOfFreedomMapping=NULL;
173     Finley_NodeMapping_free(reducedDegreesOfFreedomMapping);
174     reducedDegreesOfFreedomMapping=NULL;
175     Paso_Distribution_free(nodesDistribution);
176     nodesDistribution=NULL;
177     Paso_Distribution_free(reducedNodesDistribution);
178     nodesDistribution=NULL;
179     Paso_Distribution_free(degreesOfFreedomDistribution);
180     degreesOfFreedomDistribution=NULL;
181     Paso_Distribution_free(reducedDegreesOfFreedomDistribution);
182     reducedDegreesOfFreedomDistribution=NULL;
183     Paso_Connector_free(degreesOfFreedomConnector);
184     degreesOfFreedomConnector=NULL;
185     Paso_Connector_free(reducedDegreesOfFreedomConnector);
186     reducedDegreesOfFreedomConnector=NULL;
187    
188     numNodes=0;
189 ksteube 1312 }
190 caltinay 4428
191     void NodeFile::updateTagList()
192     {
193     Finley_Util_setValuesInUse(Tag, numNodes, tagsInUse, MPIInfo);
194 ksteube 1312 }
195 caltinay 4428
196     /// copies the array newX into this->coordinates
197     void NodeFile::setCoordinates(const escript::Data& cNewX)
198     {
199     if (cNewX.getDataPointSize() != numDim) {
200     std::stringstream ss;
201     ss << "NodeFile::setCoordinates: number of dimensions of new "
202     "coordinates has to be " << numDim;
203     const std::string errorMsg(ss.str());
204     Finley_setError(VALUE_ERROR, errorMsg.c_str());
205     } else if (cNewX.getNumDataPointsPerSample() != 1 ||
206     cNewX.getNumSamples() != numNodes) {
207     std::stringstream ss;
208     ss << "NodeFile::setCoordinates: number of given nodes must be "
209     << numNodes;
210     const std::string errorMsg(ss.str());
211     Finley_setError(VALUE_ERROR, errorMsg.c_str());
212     } else {
213     const size_t numDim_size=numDim*sizeof(double);
214     Finley_increaseStatus(this);
215     escript::Data& newX = *const_cast<escript::Data*>(&cNewX);
216     #pragma omp parallel for
217     for (int n=0; n<numNodes; n++) {
218     memcpy(&(Coordinates[INDEX2(0,n,numDim)]), newX.getSampleDataRO(n), numDim_size);
219     }
220     }
221 ksteube 1312 }
222    
223 caltinay 4428 /// sets tags to newTag where mask>0
224     void NodeFile::setTags(const int newTag, const escript::Data& cMask)
225     {
226     Finley_resetError();
227    
228     if (1 != cMask.getDataPointSize()) {
229     Finley_setError(TYPE_ERROR, "NodeFile::setTags: number of components of mask must be 1.");
230     return;
231     } else if (cMask.getNumDataPointsPerSample() != 1 ||
232     cMask.getNumSamples() != numNodes) {
233     Finley_setError(TYPE_ERROR, "NodeFile::setTags: illegal number of samples of mask Data object");
234     return;
235     }
236    
237     escript::Data& mask = *const_cast<escript::Data*>(&cMask);
238     #pragma omp parallel for
239     for (int n=0; n<numNodes; n++) {
240     if (mask.getSampleDataRO(n)[0] > 0)
241     Tag[n]=newTag;
242     }
243     updateTagList();
244 ksteube 1312 }
245    
246 caltinay 4428 std::pair<int,int> NodeFile::getDOFRange() const
247     {
248     std::pair<int,int> result(
249     Finley_Util_getMinMaxInt(1, numNodes, globalDegreesOfFreedom));
250     if (result.second < result.first) {
251     result.first = -1;
252     result.second = 0;
253     }
254     return result;
255 ksteube 1312 }
256 caltinay 4428
257     std::pair<int,int> NodeFile::getGlobalIdRange() const
258     {
259     std::pair<int,int> result(Finley_Util_getMinMaxInt(1, numNodes, Id));
260    
261     #ifdef ESYS_MPI
262     int global_id_range[2];
263     int id_range[2] = { -result.first, result.second };
264     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
265     result.first = -global_id_range[0];
266     result.second = global_id_range[1];
267     #endif
268     if (result.second < result.first) {
269     result.first = -1;
270     result.second = 0;
271     }
272     return result;
273 ksteube 1312 }
274    
275 caltinay 4428 std::pair<int,int> NodeFile::getGlobalDOFRange() const
276     {
277     std::pair<int,int> result(
278     Finley_Util_getMinMaxInt(1, numNodes, globalDegreesOfFreedom));
279 ksteube 1312
280 caltinay 4428 #ifdef ESYS_MPI
281     int global_id_range[2];
282     int id_range[2] = { -result.first, result.second };
283     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
284     result.first = -global_id_range[0];
285     result.second = global_id_range[1];
286     #endif
287     if (result.second < result.first) {
288     result.first = -1;
289     result.second = 0;
290     }
291     return result;
292 ksteube 1312 }
293 caltinay 4428
294     std::pair<int,int> NodeFile::getGlobalNodeIDIndexRange() const
295     {
296     std::pair<int,int> result(
297     Finley_Util_getMinMaxInt(1, numNodes, globalNodesIndex));
298    
299     #ifdef ESYS_MPI
300     int global_id_range[2];
301     int id_range[2] = { -result.first, result.second };
302     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
303     result.first = -global_id_range[0];
304     result.second = global_id_range[1];
305     #endif
306     if (result.second < result.first) {
307     result.first = -1;
308     result.second = 0;
309     }
310     return result;
311 ksteube 1312 }
312    
313 caltinay 4428 void NodeFile::copyTable(int offset, int idOffset, int dofOffset,
314     const NodeFile* in)
315     {
316     // check number of dimensions and table size
317     if (numDim != in->numDim) {
318     Finley_setError(TYPE_ERROR, "NodeFile::copyTable: dimensions of node files don't match");
319     return;
320     }
321     if (numNodes < in->numNodes+offset) {
322     Finley_setError(MEMORY_ERROR, "NodeFile::copyTable: node table is too small.");
323     return;
324     }
325 ksteube 1312
326 caltinay 4428 #pragma omp parallel for
327     for (int n=0; n<in->numNodes; n++) {
328     Id[offset+n]=in->Id[n]+idOffset;
329     Tag[offset+n]=in->Tag[n];
330     globalDegreesOfFreedom[offset+n]=in->globalDegreesOfFreedom[n]+dofOffset;
331     for(int i=0; i<numDim; i++)
332     Coordinates[INDEX2(i, offset+n, numDim)] =
333     in->Coordinates[INDEX2(i, n, in->numDim)];
334     }
335 ksteube 1312 }
336    
337 caltinay 4428 /// scatters the NodeFile in into this NodeFile using index[0:in->numNodes-1].
338     /// index has to be between 0 and numNodes-1.
339     /// colouring is chosen for the worst case
340     void NodeFile::scatter(int* index, const NodeFile* in)
341     {
342     scatterEntries(numNodes, index, 0, in->numNodes, Id, in->Id, Tag, in->Tag,
343     globalDegreesOfFreedom, in->globalDegreesOfFreedom,
344     numDim, Coordinates, in->Coordinates);
345 ksteube 1312 }
346    
347 caltinay 4428 /// gathers this NodeFile from the NodeFile 'in' using the entries in
348     /// index[0:out->numNodes-1] which are between min_index and max_index
349     /// (exclusive)
350     void NodeFile::gather(int* index, const NodeFile* in)
351     {
352     const std::pair<int,int> id_range(in->getGlobalIdRange());
353     gatherEntries(numNodes, index, id_range.first, id_range.second, Id, in->Id,
354     Tag, in->Tag, globalDegreesOfFreedom, in->globalDegreesOfFreedom,
355     numDim, Coordinates, in->Coordinates);
356 ksteube 1312 }
357    
358 caltinay 4428 void NodeFile::gather_global(int* index, const NodeFile* in)
359     {
360     // get the global range of node ids
361     const std::pair<int,int> id_range(in->getGlobalIdRange());
362     const int undefined_node=id_range.first-1;
363     std::vector<int> distribution(in->MPIInfo->size+1);
364    
365     // distribute the range of node ids
366     int buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,
367     id_range.first, id_range.second, &distribution[0]);
368    
369     // allocate buffers
370     int *Id_buffer=new int[buffer_len];
371     int *Tag_buffer=new int[buffer_len];
372     int *globalDegreesOfFreedom_buffer=new int[buffer_len];
373     double *Coordinates_buffer=new double[buffer_len*numDim];
374    
375     // fill Id_buffer by the undefined_node marker to check if nodes
376     // are defined
377     #pragma omp parallel for
378     for (int n=0; n<buffer_len; n++)
379     Id_buffer[n]=undefined_node;
380    
381     // fill the buffer by sending portions around in a circle
382     #ifdef ESYS_MPI
383     MPI_Status status;
384     int dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
385     int source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
386     #endif
387     int buffer_rank=in->MPIInfo->rank;
388     for (int p=0; p<in->MPIInfo->size; ++p) {
389     if (p>0) { // the initial send can be skipped
390     #ifdef ESYS_MPI
391     MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
392     in->MPIInfo->msg_tag_counter, source,
393     in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
394     MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
395     in->MPIInfo->msg_tag_counter+1, source,
396     in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
397     MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
398     MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
399     in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
400     MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
401     MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
402     in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
403     #endif
404     in->MPIInfo->msg_tag_counter+=4;
405     }
406     buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
407     scatterEntries(in->numNodes, in->Id, distribution[buffer_rank],
408     distribution[buffer_rank+1], Id_buffer, in->Id,
409     Tag_buffer, in->Tag, globalDegreesOfFreedom_buffer,
410     in->globalDegreesOfFreedom, numDim, Coordinates_buffer,
411     in->Coordinates);
412     }
413     // now entries are collected from the buffer again by sending the
414     // entries around in a circle
415     #ifdef ESYS_MPI
416     dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
417     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
418     #endif
419     buffer_rank=in->MPIInfo->rank;
420     for (int p=0; p<in->MPIInfo->size; ++p) {
421     gatherEntries(numNodes, index, distribution[buffer_rank],
422     distribution[buffer_rank+1], Id, Id_buffer, Tag, Tag_buffer,
423     globalDegreesOfFreedom, globalDegreesOfFreedom_buffer, numDim,
424     Coordinates, Coordinates_buffer);
425     if (p < in->MPIInfo->size-1) { // the last send can be skipped
426     #ifdef ESYS_MPI
427     MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
428     in->MPIInfo->msg_tag_counter, source,
429     in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
430     MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
431     in->MPIInfo->msg_tag_counter+1, source,
432     in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
433     MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
434     MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
435     in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
436     MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
437     MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
438     in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
439     #endif
440     in->MPIInfo->msg_tag_counter+=4;
441     }
442     buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
443     }
444     // check if all nodes are set:
445     #pragma omp parallel for
446     for (int n=0; n<numNodes; ++n) {
447     if (Id[n] == undefined_node) {
448     std::stringstream ss;
449     ss << "NodeFile::gather_global: Node id " << Id[n]
450     << " at position " << n << " is referenced but not defined.";
451     const std::string errorMsg(ss.str());
452     Finley_setError(VALUE_ERROR, errorMsg.c_str());
453     }
454     }
455     delete[] Id_buffer;
456     delete[] Tag_buffer;
457     delete[] globalDegreesOfFreedom_buffer;
458     delete[] Coordinates_buffer;
459     // make sure that the error is global
460     Esys_MPIInfo_noError(in->MPIInfo);
461 ksteube 1312 }
462    
463 caltinay 4428 void NodeFile::assignMPIRankToDOFs(Esys_MPI_rank* mpiRankOfDOF,
464     int *distribution)
465     {
466     Esys_MPI_rank p_min=MPIInfo->size, p_max=-1;
467     // first we retrieve the min and max DOF on this processor to reduce
468     // costs for searching
469     const std::pair<int,int> dof_range(getDOFRange());
470    
471     for (int p=0; p<MPIInfo->size; ++p) {
472     if (distribution[p]<=dof_range.first) p_min=p;
473     if (distribution[p]<=dof_range.second) p_max=p;
474     }
475     #pragma omp parallel for
476     for (int n=0; n<numNodes; ++n) {
477     const int k=globalDegreesOfFreedom[n];
478     for (int p=p_min; p<=p_max; ++p) {
479     if (k < distribution[p+1]) {
480     mpiRankOfDOF[n]=p;
481     break;
482     }
483     }
484     }
485     }
486    
487     int NodeFile::prepareLabeling(int* mask, std::vector<int>& buffer,
488     std::vector<int>& distribution, bool useNodes)
489     {
490     const int UNSET_ID=-1,SET_ID=1;
491    
492     // get the global range of DOF/node ids
493     std::pair<int,int> idRange(useNodes ?
494     getGlobalNodeIDIndexRange() : getGlobalDOFRange());
495     const int* indexArray = (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
496     // distribute the range of node ids
497     distribution.assign(MPIInfo->size+1, 0);
498     int buffer_len=Esys_MPIInfo_setDistribution(MPIInfo, idRange.first,
499     idRange.second, &distribution[0]);
500     const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
501    
502     // fill buffer by the UNSET_ID marker to check if nodes are defined
503     buffer.assign(buffer_len, UNSET_ID);
504    
505     // fill the buffer by sending portions around in a circle
506     #ifdef ESYS_MPI
507     MPI_Status status;
508     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
509     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
510     #endif
511     int buffer_rank=MPIInfo->rank;
512     for (int p=0; p<MPIInfo->size; ++p) {
513     if (p>0) { // the initial send can be skipped
514     #ifdef ESYS_MPI
515     MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
516     MPIInfo->msg_tag_counter, source, MPIInfo->msg_tag_counter,
517     MPIInfo->comm, &status);
518     #endif
519     MPIInfo->msg_tag_counter++;
520     }
521     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
522     const int id0=distribution[buffer_rank];
523     const int id1=distribution[buffer_rank+1];
524     #pragma omp parallel for
525     for (int n=0; n<numNodes; n++) {
526     if (!mask || mask[n]>-1) {
527     const int k=indexArray[n];
528     if (id0<=k && k<id1) {
529     buffer[k-id0] = SET_ID;
530     }
531     }
532     }
533     }
534     // count the entries in the buffer
535     // TODO: OMP parallel
536     int myNewCount=0;
537     for (int n=0; n<myCount; ++n) {
538     if (buffer[n] == SET_ID) {
539     buffer[n]=myNewCount;
540     myNewCount++;
541     }
542     }
543     return myNewCount;
544 ksteube 1312 }
545    
546 caltinay 4428 int NodeFile::createDenseDOFLabeling()
547     {
548     std::vector<int> DOF_buffer;
549     std::vector<int> distribution;
550     std::vector<int> loc_offsets(MPIInfo->size);
551     std::vector<int> offsets(MPIInfo->size);
552     int new_numGlobalDOFs=0;
553    
554     // retrieve the number of own DOFs and fill buffer
555     loc_offsets[MPIInfo->rank]=prepareLabeling(NULL, DOF_buffer, distribution,
556     false);
557     #ifdef ESYS_MPI
558     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
559     MPI_SUM, MPIInfo->comm);
560     for (int n=0; n<MPIInfo->size; ++n) {
561     loc_offsets[n]=new_numGlobalDOFs;
562     new_numGlobalDOFs+=offsets[n];
563     }
564     #else
565     new_numGlobalDOFs=loc_offsets[0];
566     loc_offsets[0]=0;
567     #endif
568    
569     const int myDOFs=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
570     #pragma omp parallel for
571     for (int n=0; n<myDOFs; ++n)
572     DOF_buffer[n]+=loc_offsets[MPIInfo->rank];
573    
574 caltinay 4429 std::vector<bool_t> set_new_DOF(numNodes, TRUE);
575 caltinay 4428
576     // now entries are collected from the buffer again by sending them around
577     // in a circle
578     #ifdef ESYS_MPI
579     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
580     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
581     #endif
582     int buffer_rank=MPIInfo->rank;
583     for (int p=0; p<MPIInfo->size; ++p) {
584     const int dof0=distribution[buffer_rank];
585     const int dof1=distribution[buffer_rank+1];
586     #pragma omp parallel for
587     for (int n=0; n<numNodes; n++) {
588 caltinay 4429 const int k=globalDegreesOfFreedom[n];
589     if (set_new_DOF[n] && dof0<=k && k<dof1) {
590     globalDegreesOfFreedom[n]=DOF_buffer[k-dof0];
591     set_new_DOF[n]=FALSE;
592     }
593 caltinay 4428 }
594     if (p<MPIInfo->size-1) { // the last send can be skipped
595     #ifdef ESYS_MPI
596     MPI_Status status;
597     MPI_Sendrecv_replace(&DOF_buffer[0], DOF_buffer.size(), MPI_INT,
598     dest, MPIInfo->msg_tag_counter, source,
599     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
600     #endif
601     MPIInfo->msg_tag_counter+=1;
602     }
603     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
604     }
605    
606     return new_numGlobalDOFs;
607 ksteube 1312 }
608    
609 caltinay 4428 int NodeFile::createDenseNodeLabeling(int* node_distribution,
610     const int* dof_distribution)
611     {
612     const int UNSET_ID=-1, SET_ID=1;
613     const int myFirstDOF=dof_distribution[MPIInfo->rank];
614     const int myLastDOF=dof_distribution[MPIInfo->rank+1];
615    
616     // find the range of node ids controlled by me
617     int min_id=std::numeric_limits<int>::max();
618     int max_id=std::numeric_limits<int>::min();
619     #pragma omp parallel
620     {
621     int loc_max_id=max_id;
622     int loc_min_id=min_id;
623     #pragma omp for
624     for (int n=0; n<numNodes; n++) {
625     const int dof=globalDegreesOfFreedom[n];
626     if (myFirstDOF<=dof && dof<myLastDOF) {
627     loc_max_id=std::max(loc_max_id, Id[n]);
628     loc_min_id=std::min(loc_min_id, Id[n]);
629     }
630     }
631     #pragma omp critical
632     {
633     max_id=std::max(loc_max_id, max_id);
634     min_id=std::min(loc_min_id, min_id);
635     }
636     }
637     int my_buffer_len = (max_id>=min_id ? max_id-min_id+1 : 0);
638     int buffer_len;
639    
640     #ifdef ESYS_MPI
641     MPI_Allreduce(&my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX,
642     MPIInfo->comm);
643     #else
644     buffer_len=my_buffer_len;
645     #endif
646    
647     const int header_len=2;
648     std::vector<int> Node_buffer(buffer_len+header_len, UNSET_ID);
649     // extra storage for these IDs
650     Node_buffer[0]=min_id;
651     Node_buffer[1]=max_id;
652    
653     // mark and count the nodes in use
654 caltinay 4429 #pragma omp parallel for
655     for (int n=0; n<numNodes; n++) {
656     globalNodesIndex[n]=-1;
657     const int dof=globalDegreesOfFreedom[n];
658     if (myFirstDOF<=dof && dof<myLastDOF)
659     Node_buffer[Id[n]-min_id+header_len]=SET_ID;
660 caltinay 4428 }
661     int myNewNumNodes=0;
662     for (int n=0; n<my_buffer_len; n++) {
663     if (Node_buffer[header_len+n]==SET_ID) {
664     Node_buffer[header_len+n]=myNewNumNodes;
665     myNewNumNodes++;
666     }
667     }
668     // make the local number of nodes globally available
669     #ifdef ESYS_MPI
670     MPI_Allgather(&myNewNumNodes, 1, MPI_INT, node_distribution, 1, MPI_INT,
671     MPIInfo->comm);
672     #else
673     node_distribution[0]=myNewNumNodes;
674     #endif
675    
676     int globalNumNodes=0;
677     for (int p=0; p<MPIInfo->size; ++p) {
678     const int itmp=node_distribution[p];
679     node_distribution[p]=globalNumNodes;
680     globalNumNodes+=itmp;
681     }
682     node_distribution[MPIInfo->size]=globalNumNodes;
683    
684     // offset node buffer
685 caltinay 4429 #pragma omp parallel for
686 caltinay 4428 for (int n=0; n<my_buffer_len; n++)
687     Node_buffer[n+header_len]+=node_distribution[MPIInfo->rank];
688    
689     // now we send this buffer around to assign global node index
690     #ifdef ESYS_MPI
691     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
692     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
693     #endif
694     int buffer_rank=MPIInfo->rank;
695     for (int p=0; p<MPIInfo->size; ++p) {
696     const int nodeID_0=Node_buffer[0];
697     const int nodeID_1=Node_buffer[1];
698     const int dof0=dof_distribution[buffer_rank];
699     const int dof1=dof_distribution[buffer_rank+1];
700     if (nodeID_0 <= nodeID_1) {
701 caltinay 4429 #pragma omp parallel for
702 caltinay 4428 for (int n=0; n<numNodes; n++) {
703     const int dof=globalDegreesOfFreedom[n];
704     const int id=Id[n]-nodeID_0;
705     if (dof0<=dof && dof<dof1 && id>=0 && id<=nodeID_1-nodeID_0)
706     globalNodesIndex[n]=Node_buffer[id+header_len];
707     }
708     }
709     if (p<MPIInfo->size-1) { // the last send can be skipped
710     #ifdef ESYS_MPI
711     MPI_Status status;
712     MPI_Sendrecv_replace(&Node_buffer[0], Node_buffer.size(), MPI_INT,
713     dest, MPIInfo->msg_tag_counter, source,
714     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
715     #endif
716     MPIInfo->msg_tag_counter+=1;
717     }
718     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
719     }
720     return globalNumNodes;
721 ksteube 1312 }
722    
723 caltinay 4428 int NodeFile::createDenseReducedLabeling(int* reducedMask, bool useNodes)
724     {
725     std::vector<int> buffer;
726     std::vector<int> distribution;
727     std::vector<int> loc_offsets(MPIInfo->size);
728     std::vector<int> offsets(MPIInfo->size);
729     int new_numGlobalReduced=0;
730    
731     // retrieve the number of own DOFs/nodes and fill buffer
732     loc_offsets[MPIInfo->rank]=prepareLabeling(reducedMask, buffer,
733     distribution, useNodes);
734     #ifdef ESYS_MPI
735     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
736     MPI_SUM, MPIInfo->comm);
737     for (int n=0; n<MPIInfo->size; ++n) {
738     loc_offsets[n]=new_numGlobalReduced;
739     new_numGlobalReduced+=offsets[n];
740     }
741     #else
742     new_numGlobalReduced=loc_offsets[0];
743     loc_offsets[0]=0;
744     #endif
745    
746     const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
747     #pragma omp parallel for
748     for (int n=0; n<myCount; ++n)
749     buffer[n]+=loc_offsets[MPIInfo->rank];
750    
751     const int* denseArray =
752     (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
753     int* reducedArray =
754     (useNodes ? globalReducedNodesIndex : globalReducedDOFIndex);
755    
756     #pragma omp parallel for
757     for (int n=0; n<numNodes; ++n)
758     reducedArray[n]=loc_offsets[0]-1;
759    
760     // now entries are collected from the buffer by sending them around
761     // in a circle
762     #ifdef ESYS_MPI
763     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
764     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
765     #endif
766     int buffer_rank=MPIInfo->rank;
767     for (int p=0; p<MPIInfo->size; ++p) {
768     const int id0=distribution[buffer_rank];
769     const int id1=distribution[buffer_rank+1];
770     #pragma omp parallel for
771     for (int n=0; n<numNodes; n++) {
772     if (reducedMask[n] > -1) {
773     const int k=denseArray[n];
774     if (id0<=k && k<id1)
775     reducedArray[n]=buffer[k-id0];
776     }
777     }
778     if (p<MPIInfo->size-1) { // the last send can be skipped
779     #ifdef ESYS_MPI
780     MPI_Status status;
781     MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
782     MPIInfo->msg_tag_counter, source,
783     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
784     #endif
785     MPIInfo->msg_tag_counter+=1;
786     }
787     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
788     }
789     return new_numGlobalReduced;
790 ksteube 1312 }
791    
792 caltinay 4428 } // namespace finley
793 ksteube 1312

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/lapack2681/finley/src/NodeFile.cpp:2682-2741 /branches/pasowrap/finley/src/NodeFile.cpp:3661-3674 /branches/py3_attempt2/finley/src/NodeFile.cpp:3871-3891 /branches/restext/finley/src/NodeFile.cpp:2610-2624 /branches/ripleygmg_from_3668/finley/src/NodeFile.cpp:3669-3791 /branches/stage3.0/finley/src/NodeFile.cpp:2569-2590 /branches/symbolic_from_3470/finley/src/NodeFile.cpp:3471-3974 /release/3.0/finley/src/NodeFile.cpp:2591-2601 /trunk/finley/src/NodeFile.cpp:4257-4344

  ViewVC Help
Powered by ViewVC 1.1.26