/[escript]/trunk/finley/src/NodeFile.cpp
ViewVC logotype

Annotation of /trunk/finley/src/NodeFile.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4441 - (hide annotations)
Fri Jun 7 02:23:49 2013 UTC (6 years, 3 months ago) by caltinay
File size: 28094 byte(s)
finley now uses Data objects directly instead of going through the C wrapper.

1 jgs 150
2 jfenwick 3981 /*****************************************************************************
3 ksteube 1811 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 ksteube 1811 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 ksteube 1312
16 ksteube 1811
17 caltinay 4428 /****************************************************************************
18 jgs 82
19 caltinay 4428 Finley: NodeFile
20    
21     *****************************************************************************/
22    
23 jgs 82 #include "NodeFile.h"
24 caltinay 4428 #include <escript/Data.h>
25 jgs 82
26 caltinay 4428 #include <limits>
27     #include <sstream>
28 jgs 82
29 caltinay 4428 namespace finley {
30 ksteube 1312
31 caltinay 4428 // helper function
32     static void scatterEntries(int n, int* index, int min_index, int max_index,
33     int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
34     int* globalDegreesOfFreedom_out,
35     int* globalDegreesOfFreedom_in,
36     int numDim, double* Coordinates_out,
37     double* Coordinates_in)
38 ksteube 1312 {
39 caltinay 4428 const int range = max_index-min_index;
40     const size_t numDim_size = numDim*sizeof(double);
41 ksteube 1312
42 caltinay 4428 #pragma omp parallel for
43     for (int i=0; i<n; i++) {
44     const int k=index[i]-min_index;
45     if ((k>=0) && (k<range)) {
46     Id_out[k]=Id_in[i];
47     Tag_out[k]=Tag_in[i];
48     globalDegreesOfFreedom_out[k]=globalDegreesOfFreedom_in[i];
49     memcpy(&(Coordinates_out[INDEX2(0,k,numDim)]),
50     &(Coordinates_in[INDEX2(0,i,numDim)]), numDim_size);
51     }
52     }
53     }
54 ksteube 1312
55 caltinay 4428 // helper function
56     static void gatherEntries(int n, int* index, int min_index, int max_index,
57     int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
58     int* globalDegreesOfFreedom_out,
59     int* globalDegreesOfFreedom_in,
60     int numDim, double* Coordinates_out,
61     double* Coordinates_in)
62     {
63     const int range = max_index-min_index;
64     const size_t numDim_size = numDim*sizeof(double);
65 ksteube 1312
66 caltinay 4428 #pragma omp parallel for
67     for (int i=0; i<n; i++) {
68     const int k=index[i]-min_index;
69     if ((k>=0) && (k<range)) {
70     Id_out[i]=Id_in[k];
71     Tag_out[i]=Tag_in[k];
72     globalDegreesOfFreedom_out[i]=globalDegreesOfFreedom_in[k];
73     memcpy(&(Coordinates_out[INDEX2(0,i,numDim)]),
74     &(Coordinates_in[INDEX2(0,k,numDim)]), numDim_size);
75     }
76     }
77 jgs 82 }
78    
79 caltinay 4428 /// constructor
80     /// use NodeFile::allocTable to allocate the node table (Id,Coordinates)
81     NodeFile::NodeFile(int nDim, Esys_MPIInfo *mpiInfo) :
82     numNodes(0),
83     numDim(nDim),
84     Id(NULL),
85     Tag(NULL),
86     globalDegreesOfFreedom(NULL),
87     Coordinates(NULL),
88     globalReducedDOFIndex(NULL),
89     globalReducedNodesIndex(NULL),
90     globalNodesIndex(NULL),
91     nodesMapping(NULL),
92     reducedNodesMapping(NULL),
93     degreesOfFreedomMapping(NULL),
94     reducedDegreesOfFreedomMapping(NULL),
95     nodesDistribution(NULL),
96     reducedNodesDistribution(NULL),
97     degreesOfFreedomDistribution(NULL),
98     reducedDegreesOfFreedomDistribution(NULL),
99     degreesOfFreedomConnector(NULL),
100     reducedDegreesOfFreedomConnector(NULL),
101     reducedNodesId(NULL),
102     degreesOfFreedomId(NULL),
103     reducedDegreesOfFreedomId(NULL),
104     status(FINLEY_INITIAL_STATUS)
105     {
106     MPIInfo = Esys_MPIInfo_getReference(mpiInfo);
107 jgs 82 }
108 ksteube 1312
109 caltinay 4428 /// destructor
110     NodeFile::~NodeFile()
111     {
112     freeTable();
113     Esys_MPIInfo_free(MPIInfo);
114 ksteube 1312 }
115    
116 caltinay 4431 /// allocates the node table within this node file to hold NN nodes.
117 caltinay 4428 void NodeFile::allocTable(int NN)
118     {
119     if (numNodes>0)
120     freeTable();
121    
122     Id=new int[NN];
123     Coordinates=new double[NN*numDim];
124     Tag=new int[NN];
125     globalDegreesOfFreedom=new int[NN];
126     globalReducedDOFIndex=new int[NN];
127     globalReducedNodesIndex=new int[NN];
128     globalNodesIndex=new int[NN];
129     reducedNodesId=new int[NN];
130     degreesOfFreedomId=new int[NN];
131     reducedDegreesOfFreedomId=new int[NN];
132     numNodes=NN;
133    
134     // this initialization makes sure that data are located on the right
135     // processor
136     #pragma omp parallel for
137     for (int n=0; n<numNodes; n++) {
138     Id[n]=-1;
139     for (int i=0; i<numDim; i++)
140     Coordinates[INDEX2(i,n,numDim)]=0.;
141     Tag[n]=-1;
142     globalDegreesOfFreedom[n]=-1;
143     globalReducedDOFIndex[n]=-1;
144     globalReducedNodesIndex[n]=-1;
145     globalNodesIndex[n]=-1;
146     reducedNodesId[n]=-1;
147     degreesOfFreedomId[n]=-1;
148     reducedDegreesOfFreedomId[n]=-1;
149     }
150 ksteube 1312 }
151    
152 caltinay 4431 /// frees the node table within this node file
153 caltinay 4428 void NodeFile::freeTable()
154     {
155     delete[] Id;
156     delete[] Coordinates;
157     delete[] globalDegreesOfFreedom;
158     delete[] globalReducedDOFIndex;
159     delete[] globalReducedNodesIndex;
160     delete[] globalNodesIndex;
161     delete[] Tag;
162     delete[] reducedNodesId;
163     delete[] degreesOfFreedomId;
164     delete[] reducedDegreesOfFreedomId;
165     tagsInUse.clear();
166     Finley_NodeMapping_free(nodesMapping);
167     nodesMapping=NULL;
168     Finley_NodeMapping_free(reducedNodesMapping);
169     reducedNodesMapping=NULL;
170     Finley_NodeMapping_free(degreesOfFreedomMapping);
171     degreesOfFreedomMapping=NULL;
172     Finley_NodeMapping_free(reducedDegreesOfFreedomMapping);
173     reducedDegreesOfFreedomMapping=NULL;
174     Paso_Distribution_free(nodesDistribution);
175     nodesDistribution=NULL;
176     Paso_Distribution_free(reducedNodesDistribution);
177     nodesDistribution=NULL;
178     Paso_Distribution_free(degreesOfFreedomDistribution);
179     degreesOfFreedomDistribution=NULL;
180     Paso_Distribution_free(reducedDegreesOfFreedomDistribution);
181     reducedDegreesOfFreedomDistribution=NULL;
182     Paso_Connector_free(degreesOfFreedomConnector);
183     degreesOfFreedomConnector=NULL;
184     Paso_Connector_free(reducedDegreesOfFreedomConnector);
185     reducedDegreesOfFreedomConnector=NULL;
186    
187     numNodes=0;
188 ksteube 1312 }
189 caltinay 4428
190     /// copies the array newX into this->coordinates
191     void NodeFile::setCoordinates(const escript::Data& cNewX)
192     {
193     if (cNewX.getDataPointSize() != numDim) {
194     std::stringstream ss;
195     ss << "NodeFile::setCoordinates: number of dimensions of new "
196     "coordinates has to be " << numDim;
197     const std::string errorMsg(ss.str());
198     Finley_setError(VALUE_ERROR, errorMsg.c_str());
199     } else if (cNewX.getNumDataPointsPerSample() != 1 ||
200     cNewX.getNumSamples() != numNodes) {
201     std::stringstream ss;
202     ss << "NodeFile::setCoordinates: number of given nodes must be "
203     << numNodes;
204     const std::string errorMsg(ss.str());
205     Finley_setError(VALUE_ERROR, errorMsg.c_str());
206     } else {
207     const size_t numDim_size=numDim*sizeof(double);
208     Finley_increaseStatus(this);
209     escript::Data& newX = *const_cast<escript::Data*>(&cNewX);
210     #pragma omp parallel for
211     for (int n=0; n<numNodes; n++) {
212     memcpy(&(Coordinates[INDEX2(0,n,numDim)]), newX.getSampleDataRO(n), numDim_size);
213     }
214     }
215 ksteube 1312 }
216    
217 caltinay 4428 /// sets tags to newTag where mask>0
218     void NodeFile::setTags(const int newTag, const escript::Data& cMask)
219     {
220     Finley_resetError();
221    
222     if (1 != cMask.getDataPointSize()) {
223     Finley_setError(TYPE_ERROR, "NodeFile::setTags: number of components of mask must be 1.");
224     return;
225     } else if (cMask.getNumDataPointsPerSample() != 1 ||
226     cMask.getNumSamples() != numNodes) {
227     Finley_setError(TYPE_ERROR, "NodeFile::setTags: illegal number of samples of mask Data object");
228     return;
229     }
230    
231     escript::Data& mask = *const_cast<escript::Data*>(&cMask);
232     #pragma omp parallel for
233     for (int n=0; n<numNodes; n++) {
234     if (mask.getSampleDataRO(n)[0] > 0)
235     Tag[n]=newTag;
236     }
237     updateTagList();
238 ksteube 1312 }
239    
240 caltinay 4428 std::pair<int,int> NodeFile::getDOFRange() const
241     {
242 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(
243     1, numNodes, globalDegreesOfFreedom));
244 caltinay 4428 if (result.second < result.first) {
245     result.first = -1;
246     result.second = 0;
247     }
248     return result;
249 ksteube 1312 }
250 caltinay 4428
251     std::pair<int,int> NodeFile::getGlobalIdRange() const
252     {
253 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(1, numNodes, Id));
254 caltinay 4428
255     #ifdef ESYS_MPI
256     int global_id_range[2];
257     int id_range[2] = { -result.first, result.second };
258     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
259     result.first = -global_id_range[0];
260     result.second = global_id_range[1];
261     #endif
262     if (result.second < result.first) {
263     result.first = -1;
264     result.second = 0;
265     }
266     return result;
267 ksteube 1312 }
268    
269 caltinay 4428 std::pair<int,int> NodeFile::getGlobalDOFRange() const
270     {
271 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(
272     1, numNodes, globalDegreesOfFreedom));
273 ksteube 1312
274 caltinay 4428 #ifdef ESYS_MPI
275     int global_id_range[2];
276     int id_range[2] = { -result.first, result.second };
277     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
278     result.first = -global_id_range[0];
279     result.second = global_id_range[1];
280     #endif
281     if (result.second < result.first) {
282     result.first = -1;
283     result.second = 0;
284     }
285     return result;
286 ksteube 1312 }
287 caltinay 4428
288     std::pair<int,int> NodeFile::getGlobalNodeIDIndexRange() const
289     {
290 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(1, numNodes, globalNodesIndex));
291 caltinay 4428
292     #ifdef ESYS_MPI
293     int global_id_range[2];
294     int id_range[2] = { -result.first, result.second };
295     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
296     result.first = -global_id_range[0];
297     result.second = global_id_range[1];
298     #endif
299     if (result.second < result.first) {
300     result.first = -1;
301     result.second = 0;
302     }
303     return result;
304 ksteube 1312 }
305    
306 caltinay 4428 void NodeFile::copyTable(int offset, int idOffset, int dofOffset,
307     const NodeFile* in)
308     {
309     // check number of dimensions and table size
310     if (numDim != in->numDim) {
311     Finley_setError(TYPE_ERROR, "NodeFile::copyTable: dimensions of node files don't match");
312     return;
313     }
314     if (numNodes < in->numNodes+offset) {
315     Finley_setError(MEMORY_ERROR, "NodeFile::copyTable: node table is too small.");
316     return;
317     }
318 ksteube 1312
319 caltinay 4428 #pragma omp parallel for
320     for (int n=0; n<in->numNodes; n++) {
321     Id[offset+n]=in->Id[n]+idOffset;
322     Tag[offset+n]=in->Tag[n];
323     globalDegreesOfFreedom[offset+n]=in->globalDegreesOfFreedom[n]+dofOffset;
324     for(int i=0; i<numDim; i++)
325     Coordinates[INDEX2(i, offset+n, numDim)] =
326     in->Coordinates[INDEX2(i, n, in->numDim)];
327     }
328 ksteube 1312 }
329    
330 caltinay 4428 /// scatters the NodeFile in into this NodeFile using index[0:in->numNodes-1].
331     /// index has to be between 0 and numNodes-1.
332     /// colouring is chosen for the worst case
333     void NodeFile::scatter(int* index, const NodeFile* in)
334     {
335     scatterEntries(numNodes, index, 0, in->numNodes, Id, in->Id, Tag, in->Tag,
336     globalDegreesOfFreedom, in->globalDegreesOfFreedom,
337     numDim, Coordinates, in->Coordinates);
338 ksteube 1312 }
339    
340 caltinay 4428 /// gathers this NodeFile from the NodeFile 'in' using the entries in
341     /// index[0:out->numNodes-1] which are between min_index and max_index
342     /// (exclusive)
343     void NodeFile::gather(int* index, const NodeFile* in)
344     {
345     const std::pair<int,int> id_range(in->getGlobalIdRange());
346     gatherEntries(numNodes, index, id_range.first, id_range.second, Id, in->Id,
347     Tag, in->Tag, globalDegreesOfFreedom, in->globalDegreesOfFreedom,
348     numDim, Coordinates, in->Coordinates);
349 ksteube 1312 }
350    
351 caltinay 4428 void NodeFile::gather_global(int* index, const NodeFile* in)
352     {
353     // get the global range of node ids
354     const std::pair<int,int> id_range(in->getGlobalIdRange());
355     const int undefined_node=id_range.first-1;
356     std::vector<int> distribution(in->MPIInfo->size+1);
357    
358     // distribute the range of node ids
359     int buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,
360     id_range.first, id_range.second, &distribution[0]);
361    
362     // allocate buffers
363     int *Id_buffer=new int[buffer_len];
364     int *Tag_buffer=new int[buffer_len];
365     int *globalDegreesOfFreedom_buffer=new int[buffer_len];
366     double *Coordinates_buffer=new double[buffer_len*numDim];
367    
368     // fill Id_buffer by the undefined_node marker to check if nodes
369     // are defined
370     #pragma omp parallel for
371     for (int n=0; n<buffer_len; n++)
372     Id_buffer[n]=undefined_node;
373    
374     // fill the buffer by sending portions around in a circle
375     #ifdef ESYS_MPI
376     MPI_Status status;
377     int dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
378     int source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
379     #endif
380     int buffer_rank=in->MPIInfo->rank;
381     for (int p=0; p<in->MPIInfo->size; ++p) {
382     if (p>0) { // the initial send can be skipped
383     #ifdef ESYS_MPI
384     MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
385     in->MPIInfo->msg_tag_counter, source,
386     in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
387     MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
388     in->MPIInfo->msg_tag_counter+1, source,
389     in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
390     MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
391     MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
392     in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
393     MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
394     MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
395     in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
396     #endif
397     in->MPIInfo->msg_tag_counter+=4;
398     }
399     buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
400     scatterEntries(in->numNodes, in->Id, distribution[buffer_rank],
401     distribution[buffer_rank+1], Id_buffer, in->Id,
402     Tag_buffer, in->Tag, globalDegreesOfFreedom_buffer,
403     in->globalDegreesOfFreedom, numDim, Coordinates_buffer,
404     in->Coordinates);
405     }
406     // now entries are collected from the buffer again by sending the
407     // entries around in a circle
408     #ifdef ESYS_MPI
409     dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
410     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
411     #endif
412     buffer_rank=in->MPIInfo->rank;
413     for (int p=0; p<in->MPIInfo->size; ++p) {
414     gatherEntries(numNodes, index, distribution[buffer_rank],
415     distribution[buffer_rank+1], Id, Id_buffer, Tag, Tag_buffer,
416     globalDegreesOfFreedom, globalDegreesOfFreedom_buffer, numDim,
417     Coordinates, Coordinates_buffer);
418     if (p < in->MPIInfo->size-1) { // the last send can be skipped
419     #ifdef ESYS_MPI
420     MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
421     in->MPIInfo->msg_tag_counter, source,
422     in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
423     MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
424     in->MPIInfo->msg_tag_counter+1, source,
425     in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
426     MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
427     MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
428     in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
429     MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
430     MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
431     in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
432     #endif
433     in->MPIInfo->msg_tag_counter+=4;
434     }
435     buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
436     }
437     // check if all nodes are set:
438     #pragma omp parallel for
439     for (int n=0; n<numNodes; ++n) {
440     if (Id[n] == undefined_node) {
441     std::stringstream ss;
442     ss << "NodeFile::gather_global: Node id " << Id[n]
443     << " at position " << n << " is referenced but not defined.";
444     const std::string errorMsg(ss.str());
445     Finley_setError(VALUE_ERROR, errorMsg.c_str());
446     }
447     }
448     delete[] Id_buffer;
449     delete[] Tag_buffer;
450     delete[] globalDegreesOfFreedom_buffer;
451     delete[] Coordinates_buffer;
452     // make sure that the error is global
453     Esys_MPIInfo_noError(in->MPIInfo);
454 ksteube 1312 }
455    
456 caltinay 4428 void NodeFile::assignMPIRankToDOFs(Esys_MPI_rank* mpiRankOfDOF,
457     int *distribution)
458     {
459     Esys_MPI_rank p_min=MPIInfo->size, p_max=-1;
460     // first we retrieve the min and max DOF on this processor to reduce
461     // costs for searching
462     const std::pair<int,int> dof_range(getDOFRange());
463    
464     for (int p=0; p<MPIInfo->size; ++p) {
465     if (distribution[p]<=dof_range.first) p_min=p;
466     if (distribution[p]<=dof_range.second) p_max=p;
467     }
468     #pragma omp parallel for
469     for (int n=0; n<numNodes; ++n) {
470     const int k=globalDegreesOfFreedom[n];
471     for (int p=p_min; p<=p_max; ++p) {
472     if (k < distribution[p+1]) {
473     mpiRankOfDOF[n]=p;
474     break;
475     }
476     }
477     }
478     }
479    
480     int NodeFile::prepareLabeling(int* mask, std::vector<int>& buffer,
481     std::vector<int>& distribution, bool useNodes)
482     {
483     const int UNSET_ID=-1,SET_ID=1;
484    
485     // get the global range of DOF/node ids
486     std::pair<int,int> idRange(useNodes ?
487     getGlobalNodeIDIndexRange() : getGlobalDOFRange());
488     const int* indexArray = (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
489     // distribute the range of node ids
490     distribution.assign(MPIInfo->size+1, 0);
491     int buffer_len=Esys_MPIInfo_setDistribution(MPIInfo, idRange.first,
492     idRange.second, &distribution[0]);
493     const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
494    
495     // fill buffer by the UNSET_ID marker to check if nodes are defined
496     buffer.assign(buffer_len, UNSET_ID);
497    
498     // fill the buffer by sending portions around in a circle
499     #ifdef ESYS_MPI
500     MPI_Status status;
501     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
502     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
503     #endif
504     int buffer_rank=MPIInfo->rank;
505     for (int p=0; p<MPIInfo->size; ++p) {
506     if (p>0) { // the initial send can be skipped
507     #ifdef ESYS_MPI
508     MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
509     MPIInfo->msg_tag_counter, source, MPIInfo->msg_tag_counter,
510     MPIInfo->comm, &status);
511     #endif
512     MPIInfo->msg_tag_counter++;
513     }
514     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
515     const int id0=distribution[buffer_rank];
516     const int id1=distribution[buffer_rank+1];
517     #pragma omp parallel for
518     for (int n=0; n<numNodes; n++) {
519     if (!mask || mask[n]>-1) {
520     const int k=indexArray[n];
521     if (id0<=k && k<id1) {
522     buffer[k-id0] = SET_ID;
523     }
524     }
525     }
526     }
527     // count the entries in the buffer
528     // TODO: OMP parallel
529     int myNewCount=0;
530     for (int n=0; n<myCount; ++n) {
531     if (buffer[n] == SET_ID) {
532     buffer[n]=myNewCount;
533     myNewCount++;
534     }
535     }
536     return myNewCount;
537 ksteube 1312 }
538    
539 caltinay 4428 int NodeFile::createDenseDOFLabeling()
540     {
541     std::vector<int> DOF_buffer;
542     std::vector<int> distribution;
543     std::vector<int> loc_offsets(MPIInfo->size);
544     std::vector<int> offsets(MPIInfo->size);
545     int new_numGlobalDOFs=0;
546    
547     // retrieve the number of own DOFs and fill buffer
548     loc_offsets[MPIInfo->rank]=prepareLabeling(NULL, DOF_buffer, distribution,
549     false);
550     #ifdef ESYS_MPI
551     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
552     MPI_SUM, MPIInfo->comm);
553     for (int n=0; n<MPIInfo->size; ++n) {
554     loc_offsets[n]=new_numGlobalDOFs;
555     new_numGlobalDOFs+=offsets[n];
556     }
557     #else
558     new_numGlobalDOFs=loc_offsets[0];
559     loc_offsets[0]=0;
560     #endif
561    
562     const int myDOFs=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
563     #pragma omp parallel for
564     for (int n=0; n<myDOFs; ++n)
565     DOF_buffer[n]+=loc_offsets[MPIInfo->rank];
566    
567 caltinay 4429 std::vector<bool_t> set_new_DOF(numNodes, TRUE);
568 caltinay 4428
569     // now entries are collected from the buffer again by sending them around
570     // in a circle
571     #ifdef ESYS_MPI
572     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
573     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
574     #endif
575     int buffer_rank=MPIInfo->rank;
576     for (int p=0; p<MPIInfo->size; ++p) {
577     const int dof0=distribution[buffer_rank];
578     const int dof1=distribution[buffer_rank+1];
579     #pragma omp parallel for
580     for (int n=0; n<numNodes; n++) {
581 caltinay 4429 const int k=globalDegreesOfFreedom[n];
582     if (set_new_DOF[n] && dof0<=k && k<dof1) {
583     globalDegreesOfFreedom[n]=DOF_buffer[k-dof0];
584     set_new_DOF[n]=FALSE;
585     }
586 caltinay 4428 }
587     if (p<MPIInfo->size-1) { // the last send can be skipped
588     #ifdef ESYS_MPI
589     MPI_Status status;
590     MPI_Sendrecv_replace(&DOF_buffer[0], DOF_buffer.size(), MPI_INT,
591     dest, MPIInfo->msg_tag_counter, source,
592     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
593     #endif
594     MPIInfo->msg_tag_counter+=1;
595     }
596     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
597     }
598    
599     return new_numGlobalDOFs;
600 ksteube 1312 }
601    
602 caltinay 4428 int NodeFile::createDenseNodeLabeling(int* node_distribution,
603     const int* dof_distribution)
604     {
605     const int UNSET_ID=-1, SET_ID=1;
606     const int myFirstDOF=dof_distribution[MPIInfo->rank];
607     const int myLastDOF=dof_distribution[MPIInfo->rank+1];
608    
609     // find the range of node ids controlled by me
610     int min_id=std::numeric_limits<int>::max();
611     int max_id=std::numeric_limits<int>::min();
612     #pragma omp parallel
613     {
614     int loc_max_id=max_id;
615     int loc_min_id=min_id;
616     #pragma omp for
617     for (int n=0; n<numNodes; n++) {
618     const int dof=globalDegreesOfFreedom[n];
619     if (myFirstDOF<=dof && dof<myLastDOF) {
620     loc_max_id=std::max(loc_max_id, Id[n]);
621     loc_min_id=std::min(loc_min_id, Id[n]);
622     }
623     }
624     #pragma omp critical
625     {
626     max_id=std::max(loc_max_id, max_id);
627     min_id=std::min(loc_min_id, min_id);
628     }
629     }
630     int my_buffer_len = (max_id>=min_id ? max_id-min_id+1 : 0);
631     int buffer_len;
632    
633     #ifdef ESYS_MPI
634     MPI_Allreduce(&my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX,
635     MPIInfo->comm);
636     #else
637     buffer_len=my_buffer_len;
638     #endif
639    
640     const int header_len=2;
641     std::vector<int> Node_buffer(buffer_len+header_len, UNSET_ID);
642     // extra storage for these IDs
643     Node_buffer[0]=min_id;
644     Node_buffer[1]=max_id;
645    
646     // mark and count the nodes in use
647 caltinay 4429 #pragma omp parallel for
648     for (int n=0; n<numNodes; n++) {
649     globalNodesIndex[n]=-1;
650     const int dof=globalDegreesOfFreedom[n];
651     if (myFirstDOF<=dof && dof<myLastDOF)
652     Node_buffer[Id[n]-min_id+header_len]=SET_ID;
653 caltinay 4428 }
654     int myNewNumNodes=0;
655     for (int n=0; n<my_buffer_len; n++) {
656     if (Node_buffer[header_len+n]==SET_ID) {
657     Node_buffer[header_len+n]=myNewNumNodes;
658     myNewNumNodes++;
659     }
660     }
661     // make the local number of nodes globally available
662     #ifdef ESYS_MPI
663     MPI_Allgather(&myNewNumNodes, 1, MPI_INT, node_distribution, 1, MPI_INT,
664     MPIInfo->comm);
665     #else
666     node_distribution[0]=myNewNumNodes;
667     #endif
668    
669     int globalNumNodes=0;
670     for (int p=0; p<MPIInfo->size; ++p) {
671     const int itmp=node_distribution[p];
672     node_distribution[p]=globalNumNodes;
673     globalNumNodes+=itmp;
674     }
675     node_distribution[MPIInfo->size]=globalNumNodes;
676    
677     // offset node buffer
678 caltinay 4429 #pragma omp parallel for
679 caltinay 4428 for (int n=0; n<my_buffer_len; n++)
680     Node_buffer[n+header_len]+=node_distribution[MPIInfo->rank];
681    
682     // now we send this buffer around to assign global node index
683     #ifdef ESYS_MPI
684     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
685     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
686     #endif
687     int buffer_rank=MPIInfo->rank;
688     for (int p=0; p<MPIInfo->size; ++p) {
689     const int nodeID_0=Node_buffer[0];
690     const int nodeID_1=Node_buffer[1];
691     const int dof0=dof_distribution[buffer_rank];
692     const int dof1=dof_distribution[buffer_rank+1];
693     if (nodeID_0 <= nodeID_1) {
694 caltinay 4429 #pragma omp parallel for
695 caltinay 4428 for (int n=0; n<numNodes; n++) {
696     const int dof=globalDegreesOfFreedom[n];
697     const int id=Id[n]-nodeID_0;
698     if (dof0<=dof && dof<dof1 && id>=0 && id<=nodeID_1-nodeID_0)
699     globalNodesIndex[n]=Node_buffer[id+header_len];
700     }
701     }
702     if (p<MPIInfo->size-1) { // the last send can be skipped
703     #ifdef ESYS_MPI
704     MPI_Status status;
705     MPI_Sendrecv_replace(&Node_buffer[0], Node_buffer.size(), MPI_INT,
706     dest, MPIInfo->msg_tag_counter, source,
707     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
708     #endif
709     MPIInfo->msg_tag_counter+=1;
710     }
711     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
712     }
713     return globalNumNodes;
714 ksteube 1312 }
715    
716 caltinay 4428 int NodeFile::createDenseReducedLabeling(int* reducedMask, bool useNodes)
717     {
718     std::vector<int> buffer;
719     std::vector<int> distribution;
720     std::vector<int> loc_offsets(MPIInfo->size);
721     std::vector<int> offsets(MPIInfo->size);
722     int new_numGlobalReduced=0;
723    
724     // retrieve the number of own DOFs/nodes and fill buffer
725     loc_offsets[MPIInfo->rank]=prepareLabeling(reducedMask, buffer,
726     distribution, useNodes);
727     #ifdef ESYS_MPI
728     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
729     MPI_SUM, MPIInfo->comm);
730     for (int n=0; n<MPIInfo->size; ++n) {
731     loc_offsets[n]=new_numGlobalReduced;
732     new_numGlobalReduced+=offsets[n];
733     }
734     #else
735     new_numGlobalReduced=loc_offsets[0];
736     loc_offsets[0]=0;
737     #endif
738    
739     const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
740     #pragma omp parallel for
741     for (int n=0; n<myCount; ++n)
742     buffer[n]+=loc_offsets[MPIInfo->rank];
743    
744     const int* denseArray =
745     (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
746     int* reducedArray =
747     (useNodes ? globalReducedNodesIndex : globalReducedDOFIndex);
748    
749     #pragma omp parallel for
750     for (int n=0; n<numNodes; ++n)
751     reducedArray[n]=loc_offsets[0]-1;
752    
753     // now entries are collected from the buffer by sending them around
754     // in a circle
755     #ifdef ESYS_MPI
756     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
757     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
758     #endif
759     int buffer_rank=MPIInfo->rank;
760     for (int p=0; p<MPIInfo->size; ++p) {
761     const int id0=distribution[buffer_rank];
762     const int id1=distribution[buffer_rank+1];
763     #pragma omp parallel for
764     for (int n=0; n<numNodes; n++) {
765     if (reducedMask[n] > -1) {
766     const int k=denseArray[n];
767     if (id0<=k && k<id1)
768     reducedArray[n]=buffer[k-id0];
769     }
770     }
771     if (p<MPIInfo->size-1) { // the last send can be skipped
772     #ifdef ESYS_MPI
773     MPI_Status status;
774     MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
775     MPIInfo->msg_tag_counter, source,
776     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
777     #endif
778     MPIInfo->msg_tag_counter+=1;
779     }
780     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
781     }
782     return new_numGlobalReduced;
783 ksteube 1312 }
784    
785 caltinay 4428 } // namespace finley
786 ksteube 1312

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/lapack2681/finley/src/NodeFile.cpp:2682-2741 /branches/pasowrap/finley/src/NodeFile.cpp:3661-3674 /branches/py3_attempt2/finley/src/NodeFile.cpp:3871-3891 /branches/restext/finley/src/NodeFile.cpp:2610-2624 /branches/ripleygmg_from_3668/finley/src/NodeFile.cpp:3669-3791 /branches/stage3.0/finley/src/NodeFile.cpp:2569-2590 /branches/symbolic_from_3470/finley/src/NodeFile.cpp:3471-3974 /release/3.0/finley/src/NodeFile.cpp:2591-2601 /trunk/finley/src/NodeFile.cpp:4257-4344

  ViewVC Help
Powered by ViewVC 1.1.26