/[escript]/trunk/finley/src/NodeFile.cpp
ViewVC logotype

Annotation of /trunk/finley/src/NodeFile.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5037 - (hide annotations)
Thu Jun 12 04:39:23 2014 UTC (5 years, 3 months ago) by caltinay
File size: 39967 byte(s)
more on CAP.

1 jgs 150
2 jfenwick 3981 /*****************************************************************************
3 ksteube 1811 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 ksteube 1811 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 ksteube 1312
17 ksteube 1811
18 caltinay 4428 /****************************************************************************
19 jgs 82
20 caltinay 4428 Finley: NodeFile
21    
22     *****************************************************************************/
23    
24 jgs 82 #include "NodeFile.h"
25 caltinay 4428 #include <escript/Data.h>
26 jgs 82
27 caltinay 4428 #include <limits>
28     #include <sstream>
29 jgs 82
30 caltinay 4428 namespace finley {
31 ksteube 1312
32 caltinay 4428 // helper function
33     static void scatterEntries(int n, int* index, int min_index, int max_index,
34     int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
35     int* globalDegreesOfFreedom_out,
36     int* globalDegreesOfFreedom_in,
37     int numDim, double* Coordinates_out,
38     double* Coordinates_in)
39 ksteube 1312 {
40 caltinay 4428 const int range = max_index-min_index;
41     const size_t numDim_size = numDim*sizeof(double);
42 ksteube 1312
43 caltinay 4428 #pragma omp parallel for
44     for (int i=0; i<n; i++) {
45     const int k=index[i]-min_index;
46     if ((k>=0) && (k<range)) {
47     Id_out[k]=Id_in[i];
48     Tag_out[k]=Tag_in[i];
49     globalDegreesOfFreedom_out[k]=globalDegreesOfFreedom_in[i];
50     memcpy(&(Coordinates_out[INDEX2(0,k,numDim)]),
51     &(Coordinates_in[INDEX2(0,i,numDim)]), numDim_size);
52     }
53     }
54     }
55 ksteube 1312
56 caltinay 4428 // helper function
57 caltinay 4496 static void gatherEntries(int n, const int* index, int min_index, int max_index,
58 caltinay 4492 int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
59 caltinay 4428 int* globalDegreesOfFreedom_out,
60 caltinay 4492 int* globalDegreesOfFreedom_in,
61 caltinay 4428 int numDim, double* Coordinates_out,
62     double* Coordinates_in)
63     {
64     const int range = max_index-min_index;
65     const size_t numDim_size = numDim*sizeof(double);
66 ksteube 1312
67 caltinay 4428 #pragma omp parallel for
68     for (int i=0; i<n; i++) {
69     const int k=index[i]-min_index;
70     if ((k>=0) && (k<range)) {
71     Id_out[i]=Id_in[k];
72     Tag_out[i]=Tag_in[k];
73     globalDegreesOfFreedom_out[i]=globalDegreesOfFreedom_in[k];
74     memcpy(&(Coordinates_out[INDEX2(0,i,numDim)]),
75     &(Coordinates_in[INDEX2(0,k,numDim)]), numDim_size);
76     }
77     }
78 jgs 82 }
79    
80 caltinay 4428 /// constructor
81     /// use NodeFile::allocTable to allocate the node table (Id,Coordinates)
82 jfenwick 4934 NodeFile::NodeFile(int nDim, esysUtils::JMPI& mpiInfo) :
83 caltinay 4428 numNodes(0),
84     numDim(nDim),
85     Id(NULL),
86     Tag(NULL),
87     globalDegreesOfFreedom(NULL),
88     Coordinates(NULL),
89     globalReducedDOFIndex(NULL),
90     globalReducedNodesIndex(NULL),
91     globalNodesIndex(NULL),
92     reducedNodesId(NULL),
93     degreesOfFreedomId(NULL),
94     reducedDegreesOfFreedomId(NULL),
95     status(FINLEY_INITIAL_STATUS)
96     {
97 jfenwick 4934 MPIInfo = mpiInfo;
98 jgs 82 }
99 ksteube 1312
100 caltinay 4428 /// destructor
101     NodeFile::~NodeFile()
102     {
103     freeTable();
104 ksteube 1312 }
105    
106 caltinay 4431 /// allocates the node table within this node file to hold NN nodes.
107 caltinay 4492 void NodeFile::allocTable(int NN)
108 caltinay 4428 {
109     if (numNodes>0)
110     freeTable();
111    
112     Id=new int[NN];
113     Coordinates=new double[NN*numDim];
114     Tag=new int[NN];
115     globalDegreesOfFreedom=new int[NN];
116     globalReducedDOFIndex=new int[NN];
117     globalReducedNodesIndex=new int[NN];
118     globalNodesIndex=new int[NN];
119     reducedNodesId=new int[NN];
120     degreesOfFreedomId=new int[NN];
121     reducedDegreesOfFreedomId=new int[NN];
122     numNodes=NN;
123    
124     // this initialization makes sure that data are located on the right
125     // processor
126     #pragma omp parallel for
127     for (int n=0; n<numNodes; n++) {
128     Id[n]=-1;
129     for (int i=0; i<numDim; i++)
130     Coordinates[INDEX2(i,n,numDim)]=0.;
131     Tag[n]=-1;
132     globalDegreesOfFreedom[n]=-1;
133     globalReducedDOFIndex[n]=-1;
134     globalReducedNodesIndex[n]=-1;
135     globalNodesIndex[n]=-1;
136     reducedNodesId[n]=-1;
137     degreesOfFreedomId[n]=-1;
138 caltinay 4492 reducedDegreesOfFreedomId[n]=-1;
139 caltinay 4428 }
140 ksteube 1312 }
141    
142 caltinay 4431 /// frees the node table within this node file
143 caltinay 4428 void NodeFile::freeTable()
144     {
145     delete[] Id;
146     delete[] Coordinates;
147     delete[] globalDegreesOfFreedom;
148     delete[] globalReducedDOFIndex;
149     delete[] globalReducedNodesIndex;
150     delete[] globalNodesIndex;
151     delete[] Tag;
152     delete[] reducedNodesId;
153     delete[] degreesOfFreedomId;
154     delete[] reducedDegreesOfFreedomId;
155     tagsInUse.clear();
156 caltinay 4492 nodesMapping.clear();
157     reducedNodesMapping.clear();
158     degreesOfFreedomMapping.clear();
159     reducedDegreesOfFreedomMapping.clear();
160 caltinay 4814 nodesDistribution.reset();
161     reducedNodesDistribution.reset();
162     degreesOfFreedomDistribution.reset();
163     reducedDegreesOfFreedomDistribution.reset();
164 caltinay 4817 degreesOfFreedomConnector.reset();
165     reducedDegreesOfFreedomConnector.reset();
166 caltinay 4428
167     numNodes=0;
168 ksteube 1312 }
169 caltinay 4428
170 caltinay 4492 void NodeFile::print() const
171     {
172     std::cout << "=== " << numDim << "D-Nodes:\nnumber of nodes=" << numNodes
173     << std::endl;
174     std::cout << "Id,Tag,globalDegreesOfFreedom,degreesOfFreedom,reducedDegreesOfFeedom,node,reducedNode,Coordinates" << std::endl;
175     for (int i=0; i<numNodes; i++) {
176     std::cout << Id[i] << "," << Tag[i] << "," << globalDegreesOfFreedom[i]
177     << "," << degreesOfFreedomMapping.target[i]
178     << "," << reducedDegreesOfFreedomMapping.target[i]
179     << "," << nodesMapping.target[i] << reducedNodesMapping.target[i]
180     << " ";
181     std::cout.precision(15);
182     std::cout.setf(std::ios::scientific, std::ios::floatfield);
183     for (int j=0; j<numDim; j++)
184     std:: cout << Coordinates[INDEX2(j,i,numDim)];
185     std::cout << std::endl;
186     }
187     }
188    
189 caltinay 4428 /// copies the array newX into this->coordinates
190 caltinay 4626 void NodeFile::setCoordinates(const escript::Data& newX)
191 caltinay 4428 {
192 caltinay 4626 if (newX.getDataPointSize() != numDim) {
193 caltinay 4428 std::stringstream ss;
194     ss << "NodeFile::setCoordinates: number of dimensions of new "
195     "coordinates has to be " << numDim;
196     const std::string errorMsg(ss.str());
197 caltinay 4496 setError(VALUE_ERROR, errorMsg.c_str());
198 caltinay 4626 } else if (newX.getNumDataPointsPerSample() != 1 ||
199     newX.getNumSamples() != numNodes) {
200 caltinay 4428 std::stringstream ss;
201     ss << "NodeFile::setCoordinates: number of given nodes must be "
202     << numNodes;
203     const std::string errorMsg(ss.str());
204 caltinay 4496 setError(VALUE_ERROR, errorMsg.c_str());
205 caltinay 4428 } else {
206     const size_t numDim_size=numDim*sizeof(double);
207 caltinay 4496 ++status;
208 caltinay 4428 #pragma omp parallel for
209     for (int n=0; n<numNodes; n++) {
210     memcpy(&(Coordinates[INDEX2(0,n,numDim)]), newX.getSampleDataRO(n), numDim_size);
211     }
212     }
213 ksteube 1312 }
214    
215 caltinay 4428 /// sets tags to newTag where mask>0
216 caltinay 4626 void NodeFile::setTags(const int newTag, const escript::Data& mask)
217 caltinay 4428 {
218 caltinay 4496 resetError();
219 caltinay 4428
220 caltinay 4626 if (1 != mask.getDataPointSize()) {
221 caltinay 4496 setError(TYPE_ERROR, "NodeFile::setTags: number of components of mask must be 1.");
222 caltinay 4428 return;
223 caltinay 4626 } else if (mask.getNumDataPointsPerSample() != 1 ||
224     mask.getNumSamples() != numNodes) {
225 caltinay 4496 setError(TYPE_ERROR, "NodeFile::setTags: illegal number of samples of mask Data object");
226 caltinay 4428 return;
227     }
228    
229     #pragma omp parallel for
230     for (int n=0; n<numNodes; n++) {
231     if (mask.getSampleDataRO(n)[0] > 0)
232     Tag[n]=newTag;
233     }
234     updateTagList();
235 ksteube 1312 }
236    
237 caltinay 4428 std::pair<int,int> NodeFile::getDOFRange() const
238     {
239 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(
240     1, numNodes, globalDegreesOfFreedom));
241 caltinay 4428 if (result.second < result.first) {
242     result.first = -1;
243     result.second = 0;
244     }
245     return result;
246 ksteube 1312 }
247 caltinay 4428
248     std::pair<int,int> NodeFile::getGlobalIdRange() const
249     {
250 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(1, numNodes, Id));
251 caltinay 4428
252     #ifdef ESYS_MPI
253     int global_id_range[2];
254     int id_range[2] = { -result.first, result.second };
255     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
256     result.first = -global_id_range[0];
257     result.second = global_id_range[1];
258     #endif
259     if (result.second < result.first) {
260     result.first = -1;
261     result.second = 0;
262     }
263     return result;
264 ksteube 1312 }
265    
266 caltinay 4428 std::pair<int,int> NodeFile::getGlobalDOFRange() const
267     {
268 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(
269     1, numNodes, globalDegreesOfFreedom));
270 ksteube 1312
271 caltinay 4428 #ifdef ESYS_MPI
272     int global_id_range[2];
273     int id_range[2] = { -result.first, result.second };
274     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
275     result.first = -global_id_range[0];
276     result.second = global_id_range[1];
277     #endif
278     if (result.second < result.first) {
279     result.first = -1;
280     result.second = 0;
281     }
282     return result;
283 ksteube 1312 }
284 caltinay 4428
285     std::pair<int,int> NodeFile::getGlobalNodeIDIndexRange() const
286     {
287 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(1, numNodes, globalNodesIndex));
288 caltinay 4428
289     #ifdef ESYS_MPI
290     int global_id_range[2];
291     int id_range[2] = { -result.first, result.second };
292     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
293     result.first = -global_id_range[0];
294     result.second = global_id_range[1];
295     #endif
296     if (result.second < result.first) {
297     result.first = -1;
298     result.second = 0;
299     }
300     return result;
301 ksteube 1312 }
302    
303 caltinay 4428 void NodeFile::copyTable(int offset, int idOffset, int dofOffset,
304     const NodeFile* in)
305     {
306     // check number of dimensions and table size
307     if (numDim != in->numDim) {
308 caltinay 4496 setError(TYPE_ERROR, "NodeFile::copyTable: dimensions of node files don't match");
309 caltinay 4428 return;
310     }
311     if (numNodes < in->numNodes+offset) {
312 caltinay 4496 setError(MEMORY_ERROR, "NodeFile::copyTable: node table is too small.");
313 caltinay 4428 return;
314     }
315 ksteube 1312
316 caltinay 4428 #pragma omp parallel for
317     for (int n=0; n<in->numNodes; n++) {
318     Id[offset+n]=in->Id[n]+idOffset;
319     Tag[offset+n]=in->Tag[n];
320     globalDegreesOfFreedom[offset+n]=in->globalDegreesOfFreedom[n]+dofOffset;
321     for(int i=0; i<numDim; i++)
322     Coordinates[INDEX2(i, offset+n, numDim)] =
323     in->Coordinates[INDEX2(i, n, in->numDim)];
324     }
325 ksteube 1312 }
326    
327 caltinay 4428 /// scatters the NodeFile in into this NodeFile using index[0:in->numNodes-1].
328     /// index has to be between 0 and numNodes-1.
329     /// colouring is chosen for the worst case
330     void NodeFile::scatter(int* index, const NodeFile* in)
331     {
332     scatterEntries(numNodes, index, 0, in->numNodes, Id, in->Id, Tag, in->Tag,
333     globalDegreesOfFreedom, in->globalDegreesOfFreedom,
334     numDim, Coordinates, in->Coordinates);
335 ksteube 1312 }
336    
337 caltinay 4428 /// gathers this NodeFile from the NodeFile 'in' using the entries in
338     /// index[0:out->numNodes-1] which are between min_index and max_index
339     /// (exclusive)
340 caltinay 4492 void NodeFile::gather(int* index, const NodeFile* in)
341 caltinay 4428 {
342     const std::pair<int,int> id_range(in->getGlobalIdRange());
343     gatherEntries(numNodes, index, id_range.first, id_range.second, Id, in->Id,
344     Tag, in->Tag, globalDegreesOfFreedom, in->globalDegreesOfFreedom,
345     numDim, Coordinates, in->Coordinates);
346 ksteube 1312 }
347    
348 caltinay 4496 void NodeFile::gather_global(const std::vector<int>& index, const NodeFile* in)
349 caltinay 4428 {
350     // get the global range of node ids
351     const std::pair<int,int> id_range(in->getGlobalIdRange());
352     const int undefined_node=id_range.first-1;
353     std::vector<int> distribution(in->MPIInfo->size+1);
354    
355     // distribute the range of node ids
356 jfenwick 4934 int buffer_len=in->MPIInfo->setDistribution(id_range.first, id_range.second, &distribution[0]);
357 caltinay 4428
358     // allocate buffers
359     int *Id_buffer=new int[buffer_len];
360     int *Tag_buffer=new int[buffer_len];
361     int *globalDegreesOfFreedom_buffer=new int[buffer_len];
362     double *Coordinates_buffer=new double[buffer_len*numDim];
363    
364     // fill Id_buffer by the undefined_node marker to check if nodes
365     // are defined
366     #pragma omp parallel for
367     for (int n=0; n<buffer_len; n++)
368     Id_buffer[n]=undefined_node;
369 caltinay 4492
370 caltinay 4428 // fill the buffer by sending portions around in a circle
371     #ifdef ESYS_MPI
372     MPI_Status status;
373 jfenwick 4934 int dest=esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank+1);
374     int source=esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank-1);
375 caltinay 4428 #endif
376     int buffer_rank=in->MPIInfo->rank;
377     for (int p=0; p<in->MPIInfo->size; ++p) {
378     if (p>0) { // the initial send can be skipped
379     #ifdef ESYS_MPI
380     MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
381     in->MPIInfo->msg_tag_counter, source,
382     in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
383     MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
384     in->MPIInfo->msg_tag_counter+1, source,
385     in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
386     MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
387     MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
388     in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
389     MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
390     MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
391     in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
392     #endif
393 jfenwick 4505 ESYS_MPI_INC_COUNTER(*(in->MPIInfo), 4)
394 caltinay 4428 }
395 jfenwick 4934 buffer_rank=esysUtils::mod_rank(in->MPIInfo->size, buffer_rank-1);
396 caltinay 4428 scatterEntries(in->numNodes, in->Id, distribution[buffer_rank],
397     distribution[buffer_rank+1], Id_buffer, in->Id,
398     Tag_buffer, in->Tag, globalDegreesOfFreedom_buffer,
399     in->globalDegreesOfFreedom, numDim, Coordinates_buffer,
400     in->Coordinates);
401     }
402     // now entries are collected from the buffer again by sending the
403     // entries around in a circle
404     #ifdef ESYS_MPI
405 jfenwick 4934 dest=esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank+1);
406     source=esysUtils::mod_rank(in->MPIInfo->size, in->MPIInfo->rank-1);
407 caltinay 4428 #endif
408     buffer_rank=in->MPIInfo->rank;
409     for (int p=0; p<in->MPIInfo->size; ++p) {
410 caltinay 4496 gatherEntries(numNodes, &index[0], distribution[buffer_rank],
411 caltinay 4428 distribution[buffer_rank+1], Id, Id_buffer, Tag, Tag_buffer,
412     globalDegreesOfFreedom, globalDegreesOfFreedom_buffer, numDim,
413     Coordinates, Coordinates_buffer);
414     if (p < in->MPIInfo->size-1) { // the last send can be skipped
415     #ifdef ESYS_MPI
416     MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
417     in->MPIInfo->msg_tag_counter, source,
418     in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
419     MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
420     in->MPIInfo->msg_tag_counter+1, source,
421     in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
422     MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
423     MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
424     in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
425     MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
426     MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
427     in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
428     #endif
429 jfenwick 4505 ESYS_MPI_INC_COUNTER(*(in->MPIInfo), 4)
430 caltinay 4428 }
431 jfenwick 4934 buffer_rank=esysUtils::mod_rank(in->MPIInfo->size, buffer_rank-1);
432 caltinay 4428 }
433     // check if all nodes are set:
434     #pragma omp parallel for
435     for (int n=0; n<numNodes; ++n) {
436     if (Id[n] == undefined_node) {
437     std::stringstream ss;
438     ss << "NodeFile::gather_global: Node id " << Id[n]
439     << " at position " << n << " is referenced but not defined.";
440     const std::string errorMsg(ss.str());
441 caltinay 4496 setError(VALUE_ERROR, errorMsg.c_str());
442 caltinay 4428 }
443     }
444     delete[] Id_buffer;
445     delete[] Tag_buffer;
446     delete[] globalDegreesOfFreedom_buffer;
447     delete[] Coordinates_buffer;
448     // make sure that the error is global
449 jfenwick 4934 esysUtils::Esys_MPIInfo_noError(in->MPIInfo);
450 ksteube 1312 }
451    
452 caltinay 4496 void NodeFile::assignMPIRankToDOFs(std::vector<int>& mpiRankOfDOF,
453     const std::vector<int>& distribution)
454 caltinay 4428 {
455     Esys_MPI_rank p_min=MPIInfo->size, p_max=-1;
456     // first we retrieve the min and max DOF on this processor to reduce
457     // costs for searching
458     const std::pair<int,int> dof_range(getDOFRange());
459    
460     for (int p=0; p<MPIInfo->size; ++p) {
461     if (distribution[p]<=dof_range.first) p_min=p;
462     if (distribution[p]<=dof_range.second) p_max=p;
463     }
464     #pragma omp parallel for
465     for (int n=0; n<numNodes; ++n) {
466     const int k=globalDegreesOfFreedom[n];
467     for (int p=p_min; p<=p_max; ++p) {
468     if (k < distribution[p+1]) {
469     mpiRankOfDOF[n]=p;
470     break;
471     }
472     }
473     }
474 caltinay 4492 }
475 caltinay 4428
476 caltinay 4496 int NodeFile::prepareLabeling(const std::vector<short>& mask,
477     std::vector<int>& buffer,
478 caltinay 4428 std::vector<int>& distribution, bool useNodes)
479     {
480     const int UNSET_ID=-1,SET_ID=1;
481    
482     // get the global range of DOF/node ids
483     std::pair<int,int> idRange(useNodes ?
484     getGlobalNodeIDIndexRange() : getGlobalDOFRange());
485     const int* indexArray = (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
486     // distribute the range of node ids
487     distribution.assign(MPIInfo->size+1, 0);
488 jfenwick 4934 int buffer_len=MPIInfo->setDistribution(idRange.first,
489 caltinay 4428 idRange.second, &distribution[0]);
490     const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
491    
492     // fill buffer by the UNSET_ID marker to check if nodes are defined
493     buffer.assign(buffer_len, UNSET_ID);
494    
495     // fill the buffer by sending portions around in a circle
496     #ifdef ESYS_MPI
497     MPI_Status status;
498 jfenwick 4934 int dest=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank + 1);
499     int source=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank - 1);
500 caltinay 4428 #endif
501     int buffer_rank=MPIInfo->rank;
502     for (int p=0; p<MPIInfo->size; ++p) {
503     if (p>0) { // the initial send can be skipped
504     #ifdef ESYS_MPI
505     MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
506     MPIInfo->msg_tag_counter, source, MPIInfo->msg_tag_counter,
507     MPIInfo->comm, &status);
508     #endif
509     MPIInfo->msg_tag_counter++;
510     }
511 jfenwick 4934 buffer_rank=esysUtils::mod_rank(MPIInfo->size, buffer_rank-1);
512 caltinay 4428 const int id0=distribution[buffer_rank];
513     const int id1=distribution[buffer_rank+1];
514     #pragma omp parallel for
515     for (int n=0; n<numNodes; n++) {
516 caltinay 4496 if (mask.size()<numNodes || mask[n]>-1) {
517 caltinay 4428 const int k=indexArray[n];
518     if (id0<=k && k<id1) {
519     buffer[k-id0] = SET_ID;
520     }
521     }
522     }
523     }
524     // count the entries in the buffer
525     // TODO: OMP parallel
526     int myNewCount=0;
527     for (int n=0; n<myCount; ++n) {
528     if (buffer[n] == SET_ID) {
529     buffer[n]=myNewCount;
530     myNewCount++;
531     }
532     }
533     return myNewCount;
534 ksteube 1312 }
535    
536 caltinay 4428 int NodeFile::createDenseDOFLabeling()
537     {
538     std::vector<int> DOF_buffer;
539     std::vector<int> distribution;
540     std::vector<int> loc_offsets(MPIInfo->size);
541     std::vector<int> offsets(MPIInfo->size);
542     int new_numGlobalDOFs=0;
543    
544     // retrieve the number of own DOFs and fill buffer
545 caltinay 4496 loc_offsets[MPIInfo->rank]=prepareLabeling(std::vector<short>(),
546     DOF_buffer, distribution, false);
547 caltinay 4428 #ifdef ESYS_MPI
548     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
549     MPI_SUM, MPIInfo->comm);
550     for (int n=0; n<MPIInfo->size; ++n) {
551     loc_offsets[n]=new_numGlobalDOFs;
552     new_numGlobalDOFs+=offsets[n];
553     }
554     #else
555     new_numGlobalDOFs=loc_offsets[0];
556     loc_offsets[0]=0;
557     #endif
558    
559     const int myDOFs=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
560     #pragma omp parallel for
561     for (int n=0; n<myDOFs; ++n)
562     DOF_buffer[n]+=loc_offsets[MPIInfo->rank];
563    
564 jfenwick 4524 std::vector<unsigned char> set_new_DOF(numNodes, true);
565 caltinay 4428
566     // now entries are collected from the buffer again by sending them around
567     // in a circle
568     #ifdef ESYS_MPI
569 jfenwick 4934 int dest=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank + 1);
570     int source=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank - 1);
571 caltinay 4428 #endif
572     int buffer_rank=MPIInfo->rank;
573     for (int p=0; p<MPIInfo->size; ++p) {
574     const int dof0=distribution[buffer_rank];
575     const int dof1=distribution[buffer_rank+1];
576     #pragma omp parallel for
577     for (int n=0; n<numNodes; n++) {
578 caltinay 4429 const int k=globalDegreesOfFreedom[n];
579     if (set_new_DOF[n] && dof0<=k && k<dof1) {
580     globalDegreesOfFreedom[n]=DOF_buffer[k-dof0];
581 jfenwick 4521 set_new_DOF[n]=false;
582 caltinay 4429 }
583 caltinay 4428 }
584     if (p<MPIInfo->size-1) { // the last send can be skipped
585     #ifdef ESYS_MPI
586     MPI_Status status;
587     MPI_Sendrecv_replace(&DOF_buffer[0], DOF_buffer.size(), MPI_INT,
588     dest, MPIInfo->msg_tag_counter, source,
589     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
590     #endif
591 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
592 caltinay 4428 }
593 jfenwick 4934 buffer_rank=esysUtils::mod_rank(MPIInfo->size, buffer_rank-1);
594 caltinay 4428 }
595    
596     return new_numGlobalDOFs;
597 ksteube 1312 }
598    
599 caltinay 4496 int NodeFile::createDenseNodeLabeling(std::vector<int>& nodeDistribution,
600     const std::vector<int>& dofDistribution)
601 caltinay 4428 {
602     const int UNSET_ID=-1, SET_ID=1;
603 caltinay 4496 const int myFirstDOF=dofDistribution[MPIInfo->rank];
604     const int myLastDOF=dofDistribution[MPIInfo->rank+1];
605 caltinay 4428
606     // find the range of node ids controlled by me
607     int min_id=std::numeric_limits<int>::max();
608     int max_id=std::numeric_limits<int>::min();
609     #pragma omp parallel
610     {
611     int loc_max_id=max_id;
612     int loc_min_id=min_id;
613     #pragma omp for
614     for (int n=0; n<numNodes; n++) {
615     const int dof=globalDegreesOfFreedom[n];
616     if (myFirstDOF<=dof && dof<myLastDOF) {
617     loc_max_id=std::max(loc_max_id, Id[n]);
618     loc_min_id=std::min(loc_min_id, Id[n]);
619     }
620     }
621     #pragma omp critical
622     {
623     max_id=std::max(loc_max_id, max_id);
624     min_id=std::min(loc_min_id, min_id);
625     }
626     }
627     int my_buffer_len = (max_id>=min_id ? max_id-min_id+1 : 0);
628     int buffer_len;
629    
630     #ifdef ESYS_MPI
631     MPI_Allreduce(&my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX,
632     MPIInfo->comm);
633     #else
634     buffer_len=my_buffer_len;
635     #endif
636    
637     const int header_len=2;
638     std::vector<int> Node_buffer(buffer_len+header_len, UNSET_ID);
639     // extra storage for these IDs
640     Node_buffer[0]=min_id;
641     Node_buffer[1]=max_id;
642    
643     // mark and count the nodes in use
644 caltinay 4429 #pragma omp parallel for
645     for (int n=0; n<numNodes; n++) {
646     globalNodesIndex[n]=-1;
647     const int dof=globalDegreesOfFreedom[n];
648     if (myFirstDOF<=dof && dof<myLastDOF)
649     Node_buffer[Id[n]-min_id+header_len]=SET_ID;
650 caltinay 4428 }
651     int myNewNumNodes=0;
652     for (int n=0; n<my_buffer_len; n++) {
653     if (Node_buffer[header_len+n]==SET_ID) {
654     Node_buffer[header_len+n]=myNewNumNodes;
655     myNewNumNodes++;
656     }
657     }
658     // make the local number of nodes globally available
659     #ifdef ESYS_MPI
660 caltinay 4496 MPI_Allgather(&myNewNumNodes, 1, MPI_INT, &nodeDistribution[0], 1, MPI_INT,
661 caltinay 4428 MPIInfo->comm);
662     #else
663 caltinay 4496 nodeDistribution[0]=myNewNumNodes;
664 caltinay 4428 #endif
665    
666     int globalNumNodes=0;
667     for (int p=0; p<MPIInfo->size; ++p) {
668 caltinay 4496 const int itmp=nodeDistribution[p];
669     nodeDistribution[p]=globalNumNodes;
670 caltinay 4428 globalNumNodes+=itmp;
671     }
672 caltinay 4496 nodeDistribution[MPIInfo->size]=globalNumNodes;
673 caltinay 4428
674     // offset node buffer
675 caltinay 4429 #pragma omp parallel for
676 caltinay 4428 for (int n=0; n<my_buffer_len; n++)
677 caltinay 4496 Node_buffer[n+header_len]+=nodeDistribution[MPIInfo->rank];
678 caltinay 4428
679     // now we send this buffer around to assign global node index
680     #ifdef ESYS_MPI
681 jfenwick 4934 int dest=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank + 1);
682     int source=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank - 1);
683 caltinay 4428 #endif
684     int buffer_rank=MPIInfo->rank;
685     for (int p=0; p<MPIInfo->size; ++p) {
686     const int nodeID_0=Node_buffer[0];
687     const int nodeID_1=Node_buffer[1];
688 caltinay 4496 const int dof0=dofDistribution[buffer_rank];
689     const int dof1=dofDistribution[buffer_rank+1];
690 caltinay 4428 if (nodeID_0 <= nodeID_1) {
691 caltinay 4429 #pragma omp parallel for
692 caltinay 4428 for (int n=0; n<numNodes; n++) {
693     const int dof=globalDegreesOfFreedom[n];
694     const int id=Id[n]-nodeID_0;
695     if (dof0<=dof && dof<dof1 && id>=0 && id<=nodeID_1-nodeID_0)
696     globalNodesIndex[n]=Node_buffer[id+header_len];
697     }
698     }
699     if (p<MPIInfo->size-1) { // the last send can be skipped
700     #ifdef ESYS_MPI
701     MPI_Status status;
702     MPI_Sendrecv_replace(&Node_buffer[0], Node_buffer.size(), MPI_INT,
703     dest, MPIInfo->msg_tag_counter, source,
704     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
705     #endif
706 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
707 caltinay 4428 }
708 jfenwick 4934 buffer_rank=esysUtils::mod_rank(MPIInfo->size, buffer_rank-1);
709 caltinay 4428 }
710     return globalNumNodes;
711 ksteube 1312 }
712    
713 caltinay 4496 int NodeFile::createDenseReducedLabeling(const std::vector<short>& reducedMask,
714     bool useNodes)
715 caltinay 4428 {
716     std::vector<int> buffer;
717     std::vector<int> distribution;
718     std::vector<int> loc_offsets(MPIInfo->size);
719     std::vector<int> offsets(MPIInfo->size);
720     int new_numGlobalReduced=0;
721    
722     // retrieve the number of own DOFs/nodes and fill buffer
723     loc_offsets[MPIInfo->rank]=prepareLabeling(reducedMask, buffer,
724     distribution, useNodes);
725     #ifdef ESYS_MPI
726     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
727     MPI_SUM, MPIInfo->comm);
728     for (int n=0; n<MPIInfo->size; ++n) {
729     loc_offsets[n]=new_numGlobalReduced;
730     new_numGlobalReduced+=offsets[n];
731     }
732     #else
733     new_numGlobalReduced=loc_offsets[0];
734     loc_offsets[0]=0;
735     #endif
736    
737     const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
738     #pragma omp parallel for
739     for (int n=0; n<myCount; ++n)
740     buffer[n]+=loc_offsets[MPIInfo->rank];
741    
742     const int* denseArray =
743     (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
744     int* reducedArray =
745     (useNodes ? globalReducedNodesIndex : globalReducedDOFIndex);
746    
747     #pragma omp parallel for
748     for (int n=0; n<numNodes; ++n)
749     reducedArray[n]=loc_offsets[0]-1;
750    
751     // now entries are collected from the buffer by sending them around
752     // in a circle
753     #ifdef ESYS_MPI
754 jfenwick 4934 int dest=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank + 1);
755     int source=esysUtils::mod_rank(MPIInfo->size, MPIInfo->rank - 1);
756 caltinay 4428 #endif
757     int buffer_rank=MPIInfo->rank;
758     for (int p=0; p<MPIInfo->size; ++p) {
759     const int id0=distribution[buffer_rank];
760     const int id1=distribution[buffer_rank+1];
761     #pragma omp parallel for
762     for (int n=0; n<numNodes; n++) {
763     if (reducedMask[n] > -1) {
764     const int k=denseArray[n];
765     if (id0<=k && k<id1)
766     reducedArray[n]=buffer[k-id0];
767     }
768     }
769     if (p<MPIInfo->size-1) { // the last send can be skipped
770     #ifdef ESYS_MPI
771     MPI_Status status;
772     MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
773     MPIInfo->msg_tag_counter, source,
774     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
775     #endif
776 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
777 caltinay 4428 }
778 jfenwick 4934 buffer_rank=esysUtils::mod_rank(MPIInfo->size, buffer_rank-1);
779 caltinay 4428 }
780     return new_numGlobalReduced;
781 ksteube 1312 }
782    
783 caltinay 4492 void NodeFile::createDOFMappingAndCoupling(bool use_reduced_elements)
784     {
785 caltinay 4814 paso::Distribution_ptr dof_distribution;
786 caltinay 4492 const int* globalDOFIndex;
787     if (use_reduced_elements) {
788     dof_distribution=reducedDegreesOfFreedomDistribution;
789     globalDOFIndex=globalReducedDOFIndex;
790     } else {
791     dof_distribution=degreesOfFreedomDistribution;
792     globalDOFIndex=globalDegreesOfFreedom;
793     }
794 caltinay 4814 const int myFirstDOF=dof_distribution->getFirstComponent();
795     const int myLastDOF=dof_distribution->getLastComponent();
796 caltinay 4492 const int mpiSize=MPIInfo->size;
797     const int myRank=MPIInfo->rank;
798    
799     int min_DOF, max_DOF;
800     std::pair<int,int> DOF_range(util::getFlaggedMinMaxInt(
801     numNodes, globalDOFIndex, -1));
802    
803     if (DOF_range.second < DOF_range.first) {
804     min_DOF=myFirstDOF;
805     max_DOF=myLastDOF-1;
806     } else {
807     min_DOF=DOF_range.first;
808     max_DOF=DOF_range.second;
809     }
810    
811     int p_min=mpiSize;
812     int p_max=-1;
813     if (max_DOF >= min_DOF) {
814     for (int p=0; p<mpiSize; ++p) {
815     if (dof_distribution->first_component[p]<=min_DOF) p_min=p;
816     if (dof_distribution->first_component[p]<=max_DOF) p_max=p;
817     }
818     }
819    
820     if (!((min_DOF<=myFirstDOF) && (myLastDOF-1<=max_DOF))) {
821 caltinay 4496 setError(SYSTEM_ERROR, "Local elements do not span local degrees of freedom.");
822 caltinay 4492 return;
823     }
824     const int UNUSED = -1;
825     const int len_loc_dof=max_DOF-min_DOF+1;
826     std::vector<int> shared(numNodes*(p_max-p_min+1));
827     std::vector<int> offsetInShared(mpiSize+1);
828     std::vector<int> locDOFMask(len_loc_dof, UNUSED);
829    
830     #pragma omp parallel
831     {
832     #pragma omp for
833     for (int i=0;i<numNodes;++i) {
834     const int k=globalDOFIndex[i];
835     if (k > -1) {
836     #ifdef BOUNDS_CHECK
837     if ((k-min_DOF)>=len_loc_dof) {
838     printf("BOUNDS_CHECK %s %d i=%d k=%d min_DOF=%d\n", __FILE__, __LINE__, i, k, min_DOF);
839     exit(1);
840     }
841     #endif
842     locDOFMask[k-min_DOF]=UNUSED-1;
843     }
844     }
845     #ifdef BOUNDS_CHECK
846     if (myLastDOF-min_DOF > len_loc_dof) {
847     printf("BOUNDS_CHECK %s %d\n", __FILE__, __LINE__);
848     exit(1);
849     }
850     #endif
851     #pragma omp for
852     for (int i=myFirstDOF-min_DOF; i<myLastDOF-min_DOF; ++i) {
853     locDOFMask[i]=i-myFirstDOF+min_DOF;
854     }
855     }
856    
857     std::vector<int> wanted_DOFs(numNodes);
858     std::vector<int> rcv_len(mpiSize);
859     std::vector<int> snd_len(mpiSize);
860     std::vector<int> neighbor(mpiSize);
861     int numNeighbors=0;
862     int n=0;
863     int lastn=n;
864     for (int p=p_min; p<=p_max; ++p) {
865     if (p != myRank) {
866     const int firstDOF=std::max(min_DOF, dof_distribution->first_component[p]);
867     const int lastDOF=std::min(max_DOF+1, dof_distribution->first_component[p+1]);
868     #ifdef BOUNDS_CHECK
869     if (firstDOF-min_DOF<0 || lastDOF-min_DOF>len_loc_dof) {
870     printf("BOUNDS_CHECK %s %d p=%d\n", __FILE__, __LINE__, p);
871     exit(1);
872     }
873     #endif
874     for (int i=firstDOF-min_DOF; i<lastDOF-min_DOF; ++i) {
875     if (locDOFMask[i] == UNUSED-1) {
876     locDOFMask[i]=myLastDOF-myFirstDOF+n;
877     wanted_DOFs[n]=i+min_DOF;
878     ++n;
879     }
880     }
881     if (n > lastn) {
882     rcv_len[p]=n-lastn;
883     #ifdef BOUNDS_CHECK
884     if (numNeighbors >= mpiSize+1) {
885     printf("BOUNDS_CHECK %s %d p=%d numNeighbors=%d n=%d\n", __FILE__, __LINE__, p, numNeighbors, n);
886     exit(1);
887     }
888     #endif
889     neighbor[numNeighbors]=p;
890     offsetInShared[numNeighbors]=lastn;
891     numNeighbors++;
892     lastn=n;
893     }
894     } // if p!=myRank
895     } // for p
896    
897     #ifdef BOUNDS_CHECK
898     if (numNeighbors >= mpiSize+1) {
899     printf("BOUNDS_CHECK %s %d numNeighbors=%d\n", __FILE__, __LINE__, numNeighbors);
900     exit(1);
901     }
902     #endif
903     offsetInShared[numNeighbors]=lastn;
904    
905     // assign new DOF labels to nodes
906     std::vector<int> nodeMask(numNodes, UNUSED);
907     #pragma omp parallel for
908     for (int i=0; i<numNodes; ++i) {
909     const int k=globalDOFIndex[i];
910     if (k > -1)
911     nodeMask[i]=locDOFMask[k-min_DOF];
912     }
913    
914     // now we can set the mapping from nodes to local DOFs
915     if (use_reduced_elements) {
916     reducedDegreesOfFreedomMapping.assign(nodeMask, UNUSED);
917     } else {
918     degreesOfFreedomMapping.assign(nodeMask, UNUSED);
919     }
920    
921     // define how to get DOF values for controlled but other processors
922     #ifdef BOUNDS_CHECK
923 caltinay 4496 if (offsetInShared[numNeighbors] >= numNodes*(p_max-p_min+1)) {
924 caltinay 4492 printf("BOUNDS_CHECK %s %d\n", __FILE__, __LINE__);
925     exit(1);
926     }
927     #endif
928     #pragma omp parallel for
929     for (int i=0; i<offsetInShared[numNeighbors]; ++i)
930     shared[i]=myLastDOF-myFirstDOF+i;
931    
932 caltinay 4816 paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
933 caltinay 4492 myLastDOF-myFirstDOF, numNeighbors, &neighbor[0], &shared[0],
934 caltinay 4816 &offsetInShared[0], 1, 0, MPIInfo));
935 caltinay 4492
936     /////////////////////////////////
937     // now we build the sender //
938     /////////////////////////////////
939     #ifdef ESYS_MPI
940     std::vector<MPI_Request> mpi_requests(mpiSize*2);
941     std::vector<MPI_Status> mpi_stati(mpiSize*2);
942     MPI_Alltoall(&rcv_len[0], 1, MPI_INT, &snd_len[0], 1, MPI_INT, MPIInfo->comm);
943     int count=0;
944     #else
945     snd_len[0]=rcv_len[0];
946     #endif
947    
948     for (int p=0; p<rcv_shcomp->numNeighbors; p++) {
949     #ifdef ESYS_MPI
950     MPI_Isend(&(wanted_DOFs[rcv_shcomp->offsetInShared[p]]),
951     rcv_shcomp->offsetInShared[p+1]-rcv_shcomp->offsetInShared[p],
952     MPI_INT, rcv_shcomp->neighbor[p],
953     MPIInfo->msg_tag_counter+myRank, MPIInfo->comm,
954     &mpi_requests[count]);
955     count++;
956     #endif
957     }
958     n=0;
959     numNeighbors=0;
960     for (int p=0; p<mpiSize; p++) {
961     if (snd_len[p] > 0) {
962     #ifdef ESYS_MPI
963     MPI_Irecv(&shared[n], snd_len[p], MPI_INT, p,
964     MPIInfo->msg_tag_counter+p, MPIInfo->comm,
965     &mpi_requests[count]);
966     count++;
967     #endif
968     neighbor[numNeighbors]=p;
969     offsetInShared[numNeighbors]=n;
970     numNeighbors++;
971     n+=snd_len[p];
972     }
973     }
974 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, MPIInfo->size)
975 caltinay 4492 offsetInShared[numNeighbors]=n;
976     #ifdef ESYS_MPI
977     MPI_Waitall(count, &mpi_requests[0], &mpi_stati[0]);
978     #endif
979     // map global ids to local id's
980     #pragma omp parallel for
981     for (int i=0; i<offsetInShared[numNeighbors]; ++i) {
982     shared[i]=locDOFMask[shared[i]-min_DOF];
983     }
984    
985 caltinay 4816 paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
986 caltinay 4492 myLastDOF-myFirstDOF, numNeighbors, &neighbor[0], &shared[0],
987 caltinay 4816 &offsetInShared[0], 1, 0, MPIInfo));
988 caltinay 4492
989 caltinay 4496 if (noError()) {
990 caltinay 4492 if (use_reduced_elements) {
991 caltinay 4817 reducedDegreesOfFreedomConnector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
992 caltinay 4492 } else {
993 caltinay 4817 degreesOfFreedomConnector.reset(new paso::Connector(snd_shcomp, rcv_shcomp));
994 caltinay 4492 }
995     }
996     }
997    
998 caltinay 4496 void NodeFile::createNodeMappings(const std::vector<int>& indexReducedNodes,
999     const std::vector<int>& dofDist,
1000     const std::vector<int>& nodeDist)
1001 caltinay 4492 {
1002     const int mpiSize=MPIInfo->size;
1003     const int myRank=MPIInfo->rank;
1004    
1005     const int myFirstDOF=dofDist[myRank];
1006     const int myLastDOF=dofDist[myRank+1];
1007     const int myNumDOF=myLastDOF-myFirstDOF;
1008    
1009     const int myFirstNode=nodeDist[myRank];
1010     const int myLastNode=nodeDist[myRank+1];
1011     const int myNumNodes=myLastNode-myFirstNode;
1012    
1013 caltinay 4496 std::vector<short> maskMyReducedDOF(myNumDOF, -1);
1014     std::vector<short> maskMyReducedNodes(myNumNodes, -1);
1015 caltinay 4492
1016     // mark the nodes used by the reduced mesh
1017     #pragma omp parallel for
1018 caltinay 4496 for (int i=0; i<indexReducedNodes.size(); ++i) {
1019 caltinay 4492 int k=globalNodesIndex[indexReducedNodes[i]];
1020     if (k>=myFirstNode && myLastNode>k)
1021 caltinay 4496 maskMyReducedNodes[k-myFirstNode]=1;
1022 caltinay 4492 k=globalDegreesOfFreedom[indexReducedNodes[i]];
1023     if (k>=myFirstDOF && myLastDOF>k) {
1024 caltinay 4496 maskMyReducedDOF[k-myFirstDOF]=1;
1025 caltinay 4492 }
1026     }
1027 caltinay 4496 std::vector<int> indexMyReducedDOF = util::packMask(maskMyReducedDOF);
1028     int myNumReducedDOF=indexMyReducedDOF.size();
1029     std::vector<int> indexMyReducedNodes = util::packMask(maskMyReducedNodes);
1030     int myNumReducedNodes=indexMyReducedNodes.size();
1031 caltinay 4492
1032     std::vector<int> rdofDist(mpiSize+1);
1033     std::vector<int> rnodeDist(mpiSize+1);
1034     #ifdef ESYS_MPI
1035     MPI_Allgather(&myNumReducedNodes, 1, MPI_INT, &rnodeDist[0], 1, MPI_INT, MPIInfo->comm);
1036     MPI_Allgather(&myNumReducedDOF, 1, MPI_INT, &rdofDist[0], 1, MPI_INT, MPIInfo->comm);
1037     #else
1038     rnodeDist[0]=myNumReducedNodes;
1039     rdofDist[0]=myNumReducedDOF;
1040     #endif
1041     int globalNumReducedNodes=0;
1042     int globalNumReducedDOF=0;
1043     for (int i=0; i<mpiSize;++i) {
1044     int k=rnodeDist[i];
1045     rnodeDist[i]=globalNumReducedNodes;
1046     globalNumReducedNodes+=k;
1047    
1048     k=rdofDist[i];
1049     rdofDist[i]=globalNumReducedDOF;
1050     globalNumReducedDOF+=k;
1051     }
1052     rnodeDist[mpiSize]=globalNumReducedNodes;
1053     rdofDist[mpiSize]=globalNumReducedDOF;
1054    
1055     // ==== distribution of Nodes ===============================
1056 caltinay 4814 nodesDistribution.reset(new paso::Distribution(MPIInfo, &nodeDist[0], 1, 0));
1057 caltinay 4492 // ==== distribution of DOFs ================================
1058 caltinay 4814 degreesOfFreedomDistribution.reset(new paso::Distribution(MPIInfo, &dofDist[0], 1, 0));
1059 caltinay 4492 // ==== distribution of reduced Nodes =======================
1060 caltinay 4814 reducedNodesDistribution.reset(new paso::Distribution(MPIInfo, &rnodeDist[0], 1, 0));
1061 caltinay 4492 // ==== distribution of reduced DOF =========================
1062 caltinay 4814 reducedDegreesOfFreedomDistribution.reset(new paso::Distribution(
1063     MPIInfo, &rdofDist[0], 1, 0));
1064 caltinay 4492
1065     std::vector<int> nodeMask(numNodes);
1066    
1067 caltinay 4496 if (noError()) {
1068 caltinay 4492 const int UNUSED = -1;
1069     // ==== nodes mapping which is a dummy structure ========
1070     #pragma omp parallel for
1071     for (int i=0; i<numNodes; ++i)
1072     nodeMask[i]=i;
1073     nodesMapping.assign(nodeMask, UNUSED);
1074    
1075     // ==== mapping between nodes and reduced nodes ==========
1076     #pragma omp parallel for
1077     for (int i=0; i<numNodes; ++i)
1078     nodeMask[i]=UNUSED;
1079     #pragma omp parallel for
1080 caltinay 4496 for (int i=0; i<indexReducedNodes.size(); ++i)
1081 caltinay 4492 nodeMask[indexReducedNodes[i]]=i;
1082     reducedNodesMapping.assign(nodeMask, UNUSED);
1083     }
1084     // ==== mapping between nodes and DOFs + DOF connector
1085 caltinay 4496 if (noError())
1086 caltinay 4492 createDOFMappingAndCoupling(false);
1087     // ==== mapping between nodes and reduced DOFs + reduced DOF connector
1088 caltinay 4496 if (noError())
1089 caltinay 4492 createDOFMappingAndCoupling(true);
1090    
1091     // get the Ids for DOFs and reduced nodes
1092 caltinay 4496 if (noError()) {
1093 caltinay 5037 const int rnTargets = reducedNodesMapping.getNumTargets();
1094     const int dofTargets = degreesOfFreedomMapping.getNumTargets();
1095     const int rdofTargets = reducedDegreesOfFreedomMapping.getNumTargets();
1096 caltinay 4493 #pragma omp parallel
1097 caltinay 4492 {
1098     #pragma omp for
1099 caltinay 5037 for (int i=0; i<rnTargets; ++i)
1100 caltinay 4492 reducedNodesId[i]=Id[reducedNodesMapping.map[i]];
1101     #pragma omp for
1102 caltinay 5037 for (int i=0; i<dofTargets; ++i)
1103 caltinay 4492 degreesOfFreedomId[i]=Id[degreesOfFreedomMapping.map[i]];
1104     #pragma omp for
1105 caltinay 5037 for (int i=0; i<rdofTargets; ++i)
1106 caltinay 4492 reducedDegreesOfFreedomId[i]=Id[reducedDegreesOfFreedomMapping.map[i]];
1107     }
1108     } else {
1109 caltinay 4814 nodesDistribution.reset();
1110     reducedNodesDistribution.reset();
1111     degreesOfFreedomDistribution.reset();
1112     reducedDegreesOfFreedomDistribution.reset();
1113 caltinay 4817 degreesOfFreedomConnector.reset();
1114     reducedDegreesOfFreedomConnector.reset();
1115 caltinay 4492 }
1116     }
1117    
1118 caltinay 4428 } // namespace finley
1119 ksteube 1312

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/lapack2681/finley/src/NodeFile.cpp:2682-2741 /branches/pasowrap/finley/src/NodeFile.cpp:3661-3674 /branches/py3_attempt2/finley/src/NodeFile.cpp:3871-3891 /branches/restext/finley/src/NodeFile.cpp:2610-2624 /branches/ripleygmg_from_3668/finley/src/NodeFile.cpp:3669-3791 /branches/stage3.0/finley/src/NodeFile.cpp:2569-2590 /branches/symbolic_from_3470/finley/src/NodeFile.cpp:3471-3974 /release/3.0/finley/src/NodeFile.cpp:2591-2601 /trunk/finley/src/NodeFile.cpp:4257-4344

  ViewVC Help
Powered by ViewVC 1.1.26