/[escript]/trunk/finley/src/NodeFile.cpp
ViewVC logotype

Annotation of /trunk/finley/src/NodeFile.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4816 - (hide annotations)
Fri Mar 28 06:16:02 2014 UTC (5 years, 5 months ago) by caltinay
File size: 40186 byte(s)
paso::SharedComponents now header-only and shared ptr managed.

1 jgs 150
2 jfenwick 3981 /*****************************************************************************
3 ksteube 1811 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 ksteube 1811 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 ksteube 1312
17 ksteube 1811
18 caltinay 4428 /****************************************************************************
19 jgs 82
20 caltinay 4428 Finley: NodeFile
21    
22     *****************************************************************************/
23    
24 jgs 82 #include "NodeFile.h"
25 caltinay 4428 #include <escript/Data.h>
26 caltinay 4492 #include <paso/Coupler.h>
27 jgs 82
28 caltinay 4428 #include <limits>
29     #include <sstream>
30 jgs 82
31 caltinay 4428 namespace finley {
32 ksteube 1312
33 caltinay 4428 // helper function
34     static void scatterEntries(int n, int* index, int min_index, int max_index,
35     int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
36     int* globalDegreesOfFreedom_out,
37     int* globalDegreesOfFreedom_in,
38     int numDim, double* Coordinates_out,
39     double* Coordinates_in)
40 ksteube 1312 {
41 caltinay 4428 const int range = max_index-min_index;
42     const size_t numDim_size = numDim*sizeof(double);
43 ksteube 1312
44 caltinay 4428 #pragma omp parallel for
45     for (int i=0; i<n; i++) {
46     const int k=index[i]-min_index;
47     if ((k>=0) && (k<range)) {
48     Id_out[k]=Id_in[i];
49     Tag_out[k]=Tag_in[i];
50     globalDegreesOfFreedom_out[k]=globalDegreesOfFreedom_in[i];
51     memcpy(&(Coordinates_out[INDEX2(0,k,numDim)]),
52     &(Coordinates_in[INDEX2(0,i,numDim)]), numDim_size);
53     }
54     }
55     }
56 ksteube 1312
57 caltinay 4428 // helper function
58 caltinay 4496 static void gatherEntries(int n, const int* index, int min_index, int max_index,
59 caltinay 4492 int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
60 caltinay 4428 int* globalDegreesOfFreedom_out,
61 caltinay 4492 int* globalDegreesOfFreedom_in,
62 caltinay 4428 int numDim, double* Coordinates_out,
63     double* Coordinates_in)
64     {
65     const int range = max_index-min_index;
66     const size_t numDim_size = numDim*sizeof(double);
67 ksteube 1312
68 caltinay 4428 #pragma omp parallel for
69     for (int i=0; i<n; i++) {
70     const int k=index[i]-min_index;
71     if ((k>=0) && (k<range)) {
72     Id_out[i]=Id_in[k];
73     Tag_out[i]=Tag_in[k];
74     globalDegreesOfFreedom_out[i]=globalDegreesOfFreedom_in[k];
75     memcpy(&(Coordinates_out[INDEX2(0,i,numDim)]),
76     &(Coordinates_in[INDEX2(0,k,numDim)]), numDim_size);
77     }
78     }
79 jgs 82 }
80    
81 caltinay 4428 /// constructor
82     /// use NodeFile::allocTable to allocate the node table (Id,Coordinates)
83     NodeFile::NodeFile(int nDim, Esys_MPIInfo *mpiInfo) :
84     numNodes(0),
85     numDim(nDim),
86     Id(NULL),
87     Tag(NULL),
88     globalDegreesOfFreedom(NULL),
89     Coordinates(NULL),
90     globalReducedDOFIndex(NULL),
91     globalReducedNodesIndex(NULL),
92     globalNodesIndex(NULL),
93     degreesOfFreedomConnector(NULL),
94     reducedDegreesOfFreedomConnector(NULL),
95     reducedNodesId(NULL),
96     degreesOfFreedomId(NULL),
97     reducedDegreesOfFreedomId(NULL),
98     status(FINLEY_INITIAL_STATUS)
99     {
100     MPIInfo = Esys_MPIInfo_getReference(mpiInfo);
101 jgs 82 }
102 ksteube 1312
103 caltinay 4428 /// destructor
104     NodeFile::~NodeFile()
105     {
106     freeTable();
107     Esys_MPIInfo_free(MPIInfo);
108 ksteube 1312 }
109    
110 caltinay 4431 /// allocates the node table within this node file to hold NN nodes.
111 caltinay 4492 void NodeFile::allocTable(int NN)
112 caltinay 4428 {
113     if (numNodes>0)
114     freeTable();
115    
116     Id=new int[NN];
117     Coordinates=new double[NN*numDim];
118     Tag=new int[NN];
119     globalDegreesOfFreedom=new int[NN];
120     globalReducedDOFIndex=new int[NN];
121     globalReducedNodesIndex=new int[NN];
122     globalNodesIndex=new int[NN];
123     reducedNodesId=new int[NN];
124     degreesOfFreedomId=new int[NN];
125     reducedDegreesOfFreedomId=new int[NN];
126     numNodes=NN;
127    
128     // this initialization makes sure that data are located on the right
129     // processor
130     #pragma omp parallel for
131     for (int n=0; n<numNodes; n++) {
132     Id[n]=-1;
133     for (int i=0; i<numDim; i++)
134     Coordinates[INDEX2(i,n,numDim)]=0.;
135     Tag[n]=-1;
136     globalDegreesOfFreedom[n]=-1;
137     globalReducedDOFIndex[n]=-1;
138     globalReducedNodesIndex[n]=-1;
139     globalNodesIndex[n]=-1;
140     reducedNodesId[n]=-1;
141     degreesOfFreedomId[n]=-1;
142 caltinay 4492 reducedDegreesOfFreedomId[n]=-1;
143 caltinay 4428 }
144 ksteube 1312 }
145    
146 caltinay 4431 /// frees the node table within this node file
147 caltinay 4428 void NodeFile::freeTable()
148     {
149     delete[] Id;
150     delete[] Coordinates;
151     delete[] globalDegreesOfFreedom;
152     delete[] globalReducedDOFIndex;
153     delete[] globalReducedNodesIndex;
154     delete[] globalNodesIndex;
155     delete[] Tag;
156     delete[] reducedNodesId;
157     delete[] degreesOfFreedomId;
158     delete[] reducedDegreesOfFreedomId;
159     tagsInUse.clear();
160 caltinay 4492 nodesMapping.clear();
161     reducedNodesMapping.clear();
162     degreesOfFreedomMapping.clear();
163     reducedDegreesOfFreedomMapping.clear();
164 caltinay 4814 nodesDistribution.reset();
165     reducedNodesDistribution.reset();
166     degreesOfFreedomDistribution.reset();
167     reducedDegreesOfFreedomDistribution.reset();
168 caltinay 4801 paso::Connector_free(degreesOfFreedomConnector);
169 caltinay 4428 degreesOfFreedomConnector=NULL;
170 caltinay 4801 paso::Connector_free(reducedDegreesOfFreedomConnector);
171 caltinay 4428 reducedDegreesOfFreedomConnector=NULL;
172    
173     numNodes=0;
174 ksteube 1312 }
175 caltinay 4428
176 caltinay 4492 void NodeFile::print() const
177     {
178     std::cout << "=== " << numDim << "D-Nodes:\nnumber of nodes=" << numNodes
179     << std::endl;
180     std::cout << "Id,Tag,globalDegreesOfFreedom,degreesOfFreedom,reducedDegreesOfFeedom,node,reducedNode,Coordinates" << std::endl;
181     for (int i=0; i<numNodes; i++) {
182     std::cout << Id[i] << "," << Tag[i] << "," << globalDegreesOfFreedom[i]
183     << "," << degreesOfFreedomMapping.target[i]
184     << "," << reducedDegreesOfFreedomMapping.target[i]
185     << "," << nodesMapping.target[i] << reducedNodesMapping.target[i]
186     << " ";
187     std::cout.precision(15);
188     std::cout.setf(std::ios::scientific, std::ios::floatfield);
189     for (int j=0; j<numDim; j++)
190     std:: cout << Coordinates[INDEX2(j,i,numDim)];
191     std::cout << std::endl;
192     }
193     }
194    
195 caltinay 4428 /// copies the array newX into this->coordinates
196 caltinay 4626 void NodeFile::setCoordinates(const escript::Data& newX)
197 caltinay 4428 {
198 caltinay 4626 if (newX.getDataPointSize() != numDim) {
199 caltinay 4428 std::stringstream ss;
200     ss << "NodeFile::setCoordinates: number of dimensions of new "
201     "coordinates has to be " << numDim;
202     const std::string errorMsg(ss.str());
203 caltinay 4496 setError(VALUE_ERROR, errorMsg.c_str());
204 caltinay 4626 } else if (newX.getNumDataPointsPerSample() != 1 ||
205     newX.getNumSamples() != numNodes) {
206 caltinay 4428 std::stringstream ss;
207     ss << "NodeFile::setCoordinates: number of given nodes must be "
208     << numNodes;
209     const std::string errorMsg(ss.str());
210 caltinay 4496 setError(VALUE_ERROR, errorMsg.c_str());
211 caltinay 4428 } else {
212     const size_t numDim_size=numDim*sizeof(double);
213 caltinay 4496 ++status;
214 caltinay 4428 #pragma omp parallel for
215     for (int n=0; n<numNodes; n++) {
216     memcpy(&(Coordinates[INDEX2(0,n,numDim)]), newX.getSampleDataRO(n), numDim_size);
217     }
218     }
219 ksteube 1312 }
220    
221 caltinay 4428 /// sets tags to newTag where mask>0
222 caltinay 4626 void NodeFile::setTags(const int newTag, const escript::Data& mask)
223 caltinay 4428 {
224 caltinay 4496 resetError();
225 caltinay 4428
226 caltinay 4626 if (1 != mask.getDataPointSize()) {
227 caltinay 4496 setError(TYPE_ERROR, "NodeFile::setTags: number of components of mask must be 1.");
228 caltinay 4428 return;
229 caltinay 4626 } else if (mask.getNumDataPointsPerSample() != 1 ||
230     mask.getNumSamples() != numNodes) {
231 caltinay 4496 setError(TYPE_ERROR, "NodeFile::setTags: illegal number of samples of mask Data object");
232 caltinay 4428 return;
233     }
234    
235     #pragma omp parallel for
236     for (int n=0; n<numNodes; n++) {
237     if (mask.getSampleDataRO(n)[0] > 0)
238     Tag[n]=newTag;
239     }
240     updateTagList();
241 ksteube 1312 }
242    
243 caltinay 4428 std::pair<int,int> NodeFile::getDOFRange() const
244     {
245 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(
246     1, numNodes, globalDegreesOfFreedom));
247 caltinay 4428 if (result.second < result.first) {
248     result.first = -1;
249     result.second = 0;
250     }
251     return result;
252 ksteube 1312 }
253 caltinay 4428
254     std::pair<int,int> NodeFile::getGlobalIdRange() const
255     {
256 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(1, numNodes, Id));
257 caltinay 4428
258     #ifdef ESYS_MPI
259     int global_id_range[2];
260     int id_range[2] = { -result.first, result.second };
261     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
262     result.first = -global_id_range[0];
263     result.second = global_id_range[1];
264     #endif
265     if (result.second < result.first) {
266     result.first = -1;
267     result.second = 0;
268     }
269     return result;
270 ksteube 1312 }
271    
272 caltinay 4428 std::pair<int,int> NodeFile::getGlobalDOFRange() const
273     {
274 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(
275     1, numNodes, globalDegreesOfFreedom));
276 ksteube 1312
277 caltinay 4428 #ifdef ESYS_MPI
278     int global_id_range[2];
279     int id_range[2] = { -result.first, result.second };
280     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
281     result.first = -global_id_range[0];
282     result.second = global_id_range[1];
283     #endif
284     if (result.second < result.first) {
285     result.first = -1;
286     result.second = 0;
287     }
288     return result;
289 ksteube 1312 }
290 caltinay 4428
291     std::pair<int,int> NodeFile::getGlobalNodeIDIndexRange() const
292     {
293 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(1, numNodes, globalNodesIndex));
294 caltinay 4428
295     #ifdef ESYS_MPI
296     int global_id_range[2];
297     int id_range[2] = { -result.first, result.second };
298     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
299     result.first = -global_id_range[0];
300     result.second = global_id_range[1];
301     #endif
302     if (result.second < result.first) {
303     result.first = -1;
304     result.second = 0;
305     }
306     return result;
307 ksteube 1312 }
308    
309 caltinay 4428 void NodeFile::copyTable(int offset, int idOffset, int dofOffset,
310     const NodeFile* in)
311     {
312     // check number of dimensions and table size
313     if (numDim != in->numDim) {
314 caltinay 4496 setError(TYPE_ERROR, "NodeFile::copyTable: dimensions of node files don't match");
315 caltinay 4428 return;
316     }
317     if (numNodes < in->numNodes+offset) {
318 caltinay 4496 setError(MEMORY_ERROR, "NodeFile::copyTable: node table is too small.");
319 caltinay 4428 return;
320     }
321 ksteube 1312
322 caltinay 4428 #pragma omp parallel for
323     for (int n=0; n<in->numNodes; n++) {
324     Id[offset+n]=in->Id[n]+idOffset;
325     Tag[offset+n]=in->Tag[n];
326     globalDegreesOfFreedom[offset+n]=in->globalDegreesOfFreedom[n]+dofOffset;
327     for(int i=0; i<numDim; i++)
328     Coordinates[INDEX2(i, offset+n, numDim)] =
329     in->Coordinates[INDEX2(i, n, in->numDim)];
330     }
331 ksteube 1312 }
332    
333 caltinay 4428 /// scatters the NodeFile in into this NodeFile using index[0:in->numNodes-1].
334     /// index has to be between 0 and numNodes-1.
335     /// colouring is chosen for the worst case
336     void NodeFile::scatter(int* index, const NodeFile* in)
337     {
338     scatterEntries(numNodes, index, 0, in->numNodes, Id, in->Id, Tag, in->Tag,
339     globalDegreesOfFreedom, in->globalDegreesOfFreedom,
340     numDim, Coordinates, in->Coordinates);
341 ksteube 1312 }
342    
343 caltinay 4428 /// gathers this NodeFile from the NodeFile 'in' using the entries in
344     /// index[0:out->numNodes-1] which are between min_index and max_index
345     /// (exclusive)
346 caltinay 4492 void NodeFile::gather(int* index, const NodeFile* in)
347 caltinay 4428 {
348     const std::pair<int,int> id_range(in->getGlobalIdRange());
349     gatherEntries(numNodes, index, id_range.first, id_range.second, Id, in->Id,
350     Tag, in->Tag, globalDegreesOfFreedom, in->globalDegreesOfFreedom,
351     numDim, Coordinates, in->Coordinates);
352 ksteube 1312 }
353    
354 caltinay 4496 void NodeFile::gather_global(const std::vector<int>& index, const NodeFile* in)
355 caltinay 4428 {
356     // get the global range of node ids
357     const std::pair<int,int> id_range(in->getGlobalIdRange());
358     const int undefined_node=id_range.first-1;
359     std::vector<int> distribution(in->MPIInfo->size+1);
360    
361     // distribute the range of node ids
362     int buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,
363     id_range.first, id_range.second, &distribution[0]);
364    
365     // allocate buffers
366     int *Id_buffer=new int[buffer_len];
367     int *Tag_buffer=new int[buffer_len];
368     int *globalDegreesOfFreedom_buffer=new int[buffer_len];
369     double *Coordinates_buffer=new double[buffer_len*numDim];
370    
371     // fill Id_buffer by the undefined_node marker to check if nodes
372     // are defined
373     #pragma omp parallel for
374     for (int n=0; n<buffer_len; n++)
375     Id_buffer[n]=undefined_node;
376 caltinay 4492
377 caltinay 4428 // fill the buffer by sending portions around in a circle
378     #ifdef ESYS_MPI
379     MPI_Status status;
380     int dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
381     int source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
382     #endif
383     int buffer_rank=in->MPIInfo->rank;
384     for (int p=0; p<in->MPIInfo->size; ++p) {
385     if (p>0) { // the initial send can be skipped
386     #ifdef ESYS_MPI
387     MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
388     in->MPIInfo->msg_tag_counter, source,
389     in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
390     MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
391     in->MPIInfo->msg_tag_counter+1, source,
392     in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
393     MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
394     MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
395     in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
396     MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
397     MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
398     in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
399     #endif
400 jfenwick 4505 ESYS_MPI_INC_COUNTER(*(in->MPIInfo), 4)
401 caltinay 4428 }
402     buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
403     scatterEntries(in->numNodes, in->Id, distribution[buffer_rank],
404     distribution[buffer_rank+1], Id_buffer, in->Id,
405     Tag_buffer, in->Tag, globalDegreesOfFreedom_buffer,
406     in->globalDegreesOfFreedom, numDim, Coordinates_buffer,
407     in->Coordinates);
408     }
409     // now entries are collected from the buffer again by sending the
410     // entries around in a circle
411     #ifdef ESYS_MPI
412     dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
413     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
414     #endif
415     buffer_rank=in->MPIInfo->rank;
416     for (int p=0; p<in->MPIInfo->size; ++p) {
417 caltinay 4496 gatherEntries(numNodes, &index[0], distribution[buffer_rank],
418 caltinay 4428 distribution[buffer_rank+1], Id, Id_buffer, Tag, Tag_buffer,
419     globalDegreesOfFreedom, globalDegreesOfFreedom_buffer, numDim,
420     Coordinates, Coordinates_buffer);
421     if (p < in->MPIInfo->size-1) { // the last send can be skipped
422     #ifdef ESYS_MPI
423     MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
424     in->MPIInfo->msg_tag_counter, source,
425     in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
426     MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
427     in->MPIInfo->msg_tag_counter+1, source,
428     in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
429     MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
430     MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
431     in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
432     MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
433     MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
434     in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
435     #endif
436 jfenwick 4505 ESYS_MPI_INC_COUNTER(*(in->MPIInfo), 4)
437 caltinay 4428 }
438     buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
439     }
440     // check if all nodes are set:
441     #pragma omp parallel for
442     for (int n=0; n<numNodes; ++n) {
443     if (Id[n] == undefined_node) {
444     std::stringstream ss;
445     ss << "NodeFile::gather_global: Node id " << Id[n]
446     << " at position " << n << " is referenced but not defined.";
447     const std::string errorMsg(ss.str());
448 caltinay 4496 setError(VALUE_ERROR, errorMsg.c_str());
449 caltinay 4428 }
450     }
451     delete[] Id_buffer;
452     delete[] Tag_buffer;
453     delete[] globalDegreesOfFreedom_buffer;
454     delete[] Coordinates_buffer;
455     // make sure that the error is global
456     Esys_MPIInfo_noError(in->MPIInfo);
457 ksteube 1312 }
458    
459 caltinay 4496 void NodeFile::assignMPIRankToDOFs(std::vector<int>& mpiRankOfDOF,
460     const std::vector<int>& distribution)
461 caltinay 4428 {
462     Esys_MPI_rank p_min=MPIInfo->size, p_max=-1;
463     // first we retrieve the min and max DOF on this processor to reduce
464     // costs for searching
465     const std::pair<int,int> dof_range(getDOFRange());
466    
467     for (int p=0; p<MPIInfo->size; ++p) {
468     if (distribution[p]<=dof_range.first) p_min=p;
469     if (distribution[p]<=dof_range.second) p_max=p;
470     }
471     #pragma omp parallel for
472     for (int n=0; n<numNodes; ++n) {
473     const int k=globalDegreesOfFreedom[n];
474     for (int p=p_min; p<=p_max; ++p) {
475     if (k < distribution[p+1]) {
476     mpiRankOfDOF[n]=p;
477     break;
478     }
479     }
480     }
481 caltinay 4492 }
482 caltinay 4428
483 caltinay 4496 int NodeFile::prepareLabeling(const std::vector<short>& mask,
484     std::vector<int>& buffer,
485 caltinay 4428 std::vector<int>& distribution, bool useNodes)
486     {
487     const int UNSET_ID=-1,SET_ID=1;
488    
489     // get the global range of DOF/node ids
490     std::pair<int,int> idRange(useNodes ?
491     getGlobalNodeIDIndexRange() : getGlobalDOFRange());
492     const int* indexArray = (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
493     // distribute the range of node ids
494     distribution.assign(MPIInfo->size+1, 0);
495     int buffer_len=Esys_MPIInfo_setDistribution(MPIInfo, idRange.first,
496     idRange.second, &distribution[0]);
497     const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
498    
499     // fill buffer by the UNSET_ID marker to check if nodes are defined
500     buffer.assign(buffer_len, UNSET_ID);
501    
502     // fill the buffer by sending portions around in a circle
503     #ifdef ESYS_MPI
504     MPI_Status status;
505     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
506     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
507     #endif
508     int buffer_rank=MPIInfo->rank;
509     for (int p=0; p<MPIInfo->size; ++p) {
510     if (p>0) { // the initial send can be skipped
511     #ifdef ESYS_MPI
512     MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
513     MPIInfo->msg_tag_counter, source, MPIInfo->msg_tag_counter,
514     MPIInfo->comm, &status);
515     #endif
516     MPIInfo->msg_tag_counter++;
517     }
518     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
519     const int id0=distribution[buffer_rank];
520     const int id1=distribution[buffer_rank+1];
521     #pragma omp parallel for
522     for (int n=0; n<numNodes; n++) {
523 caltinay 4496 if (mask.size()<numNodes || mask[n]>-1) {
524 caltinay 4428 const int k=indexArray[n];
525     if (id0<=k && k<id1) {
526     buffer[k-id0] = SET_ID;
527     }
528     }
529     }
530     }
531     // count the entries in the buffer
532     // TODO: OMP parallel
533     int myNewCount=0;
534     for (int n=0; n<myCount; ++n) {
535     if (buffer[n] == SET_ID) {
536     buffer[n]=myNewCount;
537     myNewCount++;
538     }
539     }
540     return myNewCount;
541 ksteube 1312 }
542    
543 caltinay 4428 int NodeFile::createDenseDOFLabeling()
544     {
545     std::vector<int> DOF_buffer;
546     std::vector<int> distribution;
547     std::vector<int> loc_offsets(MPIInfo->size);
548     std::vector<int> offsets(MPIInfo->size);
549     int new_numGlobalDOFs=0;
550    
551     // retrieve the number of own DOFs and fill buffer
552 caltinay 4496 loc_offsets[MPIInfo->rank]=prepareLabeling(std::vector<short>(),
553     DOF_buffer, distribution, false);
554 caltinay 4428 #ifdef ESYS_MPI
555     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
556     MPI_SUM, MPIInfo->comm);
557     for (int n=0; n<MPIInfo->size; ++n) {
558     loc_offsets[n]=new_numGlobalDOFs;
559     new_numGlobalDOFs+=offsets[n];
560     }
561     #else
562     new_numGlobalDOFs=loc_offsets[0];
563     loc_offsets[0]=0;
564     #endif
565    
566     const int myDOFs=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
567     #pragma omp parallel for
568     for (int n=0; n<myDOFs; ++n)
569     DOF_buffer[n]+=loc_offsets[MPIInfo->rank];
570    
571 jfenwick 4524 std::vector<unsigned char> set_new_DOF(numNodes, true);
572 caltinay 4428
573     // now entries are collected from the buffer again by sending them around
574     // in a circle
575     #ifdef ESYS_MPI
576     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
577     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
578     #endif
579     int buffer_rank=MPIInfo->rank;
580     for (int p=0; p<MPIInfo->size; ++p) {
581     const int dof0=distribution[buffer_rank];
582     const int dof1=distribution[buffer_rank+1];
583     #pragma omp parallel for
584     for (int n=0; n<numNodes; n++) {
585 caltinay 4429 const int k=globalDegreesOfFreedom[n];
586     if (set_new_DOF[n] && dof0<=k && k<dof1) {
587     globalDegreesOfFreedom[n]=DOF_buffer[k-dof0];
588 jfenwick 4521 set_new_DOF[n]=false;
589 caltinay 4429 }
590 caltinay 4428 }
591     if (p<MPIInfo->size-1) { // the last send can be skipped
592     #ifdef ESYS_MPI
593     MPI_Status status;
594     MPI_Sendrecv_replace(&DOF_buffer[0], DOF_buffer.size(), MPI_INT,
595     dest, MPIInfo->msg_tag_counter, source,
596     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
597     #endif
598 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
599 caltinay 4428 }
600     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
601     }
602    
603     return new_numGlobalDOFs;
604 ksteube 1312 }
605    
606 caltinay 4496 int NodeFile::createDenseNodeLabeling(std::vector<int>& nodeDistribution,
607     const std::vector<int>& dofDistribution)
608 caltinay 4428 {
609     const int UNSET_ID=-1, SET_ID=1;
610 caltinay 4496 const int myFirstDOF=dofDistribution[MPIInfo->rank];
611     const int myLastDOF=dofDistribution[MPIInfo->rank+1];
612 caltinay 4428
613     // find the range of node ids controlled by me
614     int min_id=std::numeric_limits<int>::max();
615     int max_id=std::numeric_limits<int>::min();
616     #pragma omp parallel
617     {
618     int loc_max_id=max_id;
619     int loc_min_id=min_id;
620     #pragma omp for
621     for (int n=0; n<numNodes; n++) {
622     const int dof=globalDegreesOfFreedom[n];
623     if (myFirstDOF<=dof && dof<myLastDOF) {
624     loc_max_id=std::max(loc_max_id, Id[n]);
625     loc_min_id=std::min(loc_min_id, Id[n]);
626     }
627     }
628     #pragma omp critical
629     {
630     max_id=std::max(loc_max_id, max_id);
631     min_id=std::min(loc_min_id, min_id);
632     }
633     }
634     int my_buffer_len = (max_id>=min_id ? max_id-min_id+1 : 0);
635     int buffer_len;
636    
637     #ifdef ESYS_MPI
638     MPI_Allreduce(&my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX,
639     MPIInfo->comm);
640     #else
641     buffer_len=my_buffer_len;
642     #endif
643    
644     const int header_len=2;
645     std::vector<int> Node_buffer(buffer_len+header_len, UNSET_ID);
646     // extra storage for these IDs
647     Node_buffer[0]=min_id;
648     Node_buffer[1]=max_id;
649    
650     // mark and count the nodes in use
651 caltinay 4429 #pragma omp parallel for
652     for (int n=0; n<numNodes; n++) {
653     globalNodesIndex[n]=-1;
654     const int dof=globalDegreesOfFreedom[n];
655     if (myFirstDOF<=dof && dof<myLastDOF)
656     Node_buffer[Id[n]-min_id+header_len]=SET_ID;
657 caltinay 4428 }
658     int myNewNumNodes=0;
659     for (int n=0; n<my_buffer_len; n++) {
660     if (Node_buffer[header_len+n]==SET_ID) {
661     Node_buffer[header_len+n]=myNewNumNodes;
662     myNewNumNodes++;
663     }
664     }
665     // make the local number of nodes globally available
666     #ifdef ESYS_MPI
667 caltinay 4496 MPI_Allgather(&myNewNumNodes, 1, MPI_INT, &nodeDistribution[0], 1, MPI_INT,
668 caltinay 4428 MPIInfo->comm);
669     #else
670 caltinay 4496 nodeDistribution[0]=myNewNumNodes;
671 caltinay 4428 #endif
672    
673     int globalNumNodes=0;
674     for (int p=0; p<MPIInfo->size; ++p) {
675 caltinay 4496 const int itmp=nodeDistribution[p];
676     nodeDistribution[p]=globalNumNodes;
677 caltinay 4428 globalNumNodes+=itmp;
678     }
679 caltinay 4496 nodeDistribution[MPIInfo->size]=globalNumNodes;
680 caltinay 4428
681     // offset node buffer
682 caltinay 4429 #pragma omp parallel for
683 caltinay 4428 for (int n=0; n<my_buffer_len; n++)
684 caltinay 4496 Node_buffer[n+header_len]+=nodeDistribution[MPIInfo->rank];
685 caltinay 4428
686     // now we send this buffer around to assign global node index
687     #ifdef ESYS_MPI
688     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
689     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
690     #endif
691     int buffer_rank=MPIInfo->rank;
692     for (int p=0; p<MPIInfo->size; ++p) {
693     const int nodeID_0=Node_buffer[0];
694     const int nodeID_1=Node_buffer[1];
695 caltinay 4496 const int dof0=dofDistribution[buffer_rank];
696     const int dof1=dofDistribution[buffer_rank+1];
697 caltinay 4428 if (nodeID_0 <= nodeID_1) {
698 caltinay 4429 #pragma omp parallel for
699 caltinay 4428 for (int n=0; n<numNodes; n++) {
700     const int dof=globalDegreesOfFreedom[n];
701     const int id=Id[n]-nodeID_0;
702     if (dof0<=dof && dof<dof1 && id>=0 && id<=nodeID_1-nodeID_0)
703     globalNodesIndex[n]=Node_buffer[id+header_len];
704     }
705     }
706     if (p<MPIInfo->size-1) { // the last send can be skipped
707     #ifdef ESYS_MPI
708     MPI_Status status;
709     MPI_Sendrecv_replace(&Node_buffer[0], Node_buffer.size(), MPI_INT,
710     dest, MPIInfo->msg_tag_counter, source,
711     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
712     #endif
713 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
714 caltinay 4428 }
715     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
716     }
717     return globalNumNodes;
718 ksteube 1312 }
719    
720 caltinay 4496 int NodeFile::createDenseReducedLabeling(const std::vector<short>& reducedMask,
721     bool useNodes)
722 caltinay 4428 {
723     std::vector<int> buffer;
724     std::vector<int> distribution;
725     std::vector<int> loc_offsets(MPIInfo->size);
726     std::vector<int> offsets(MPIInfo->size);
727     int new_numGlobalReduced=0;
728    
729     // retrieve the number of own DOFs/nodes and fill buffer
730     loc_offsets[MPIInfo->rank]=prepareLabeling(reducedMask, buffer,
731     distribution, useNodes);
732     #ifdef ESYS_MPI
733     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
734     MPI_SUM, MPIInfo->comm);
735     for (int n=0; n<MPIInfo->size; ++n) {
736     loc_offsets[n]=new_numGlobalReduced;
737     new_numGlobalReduced+=offsets[n];
738     }
739     #else
740     new_numGlobalReduced=loc_offsets[0];
741     loc_offsets[0]=0;
742     #endif
743    
744     const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
745     #pragma omp parallel for
746     for (int n=0; n<myCount; ++n)
747     buffer[n]+=loc_offsets[MPIInfo->rank];
748    
749     const int* denseArray =
750     (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
751     int* reducedArray =
752     (useNodes ? globalReducedNodesIndex : globalReducedDOFIndex);
753    
754     #pragma omp parallel for
755     for (int n=0; n<numNodes; ++n)
756     reducedArray[n]=loc_offsets[0]-1;
757    
758     // now entries are collected from the buffer by sending them around
759     // in a circle
760     #ifdef ESYS_MPI
761     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
762     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
763     #endif
764     int buffer_rank=MPIInfo->rank;
765     for (int p=0; p<MPIInfo->size; ++p) {
766     const int id0=distribution[buffer_rank];
767     const int id1=distribution[buffer_rank+1];
768     #pragma omp parallel for
769     for (int n=0; n<numNodes; n++) {
770     if (reducedMask[n] > -1) {
771     const int k=denseArray[n];
772     if (id0<=k && k<id1)
773     reducedArray[n]=buffer[k-id0];
774     }
775     }
776     if (p<MPIInfo->size-1) { // the last send can be skipped
777     #ifdef ESYS_MPI
778     MPI_Status status;
779     MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
780     MPIInfo->msg_tag_counter, source,
781     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
782     #endif
783 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
784 caltinay 4428 }
785     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
786     }
787     return new_numGlobalReduced;
788 ksteube 1312 }
789    
790 caltinay 4492 void NodeFile::createDOFMappingAndCoupling(bool use_reduced_elements)
791     {
792 caltinay 4814 paso::Distribution_ptr dof_distribution;
793 caltinay 4492 const int* globalDOFIndex;
794     if (use_reduced_elements) {
795     dof_distribution=reducedDegreesOfFreedomDistribution;
796     globalDOFIndex=globalReducedDOFIndex;
797     } else {
798     dof_distribution=degreesOfFreedomDistribution;
799     globalDOFIndex=globalDegreesOfFreedom;
800     }
801 caltinay 4814 const int myFirstDOF=dof_distribution->getFirstComponent();
802     const int myLastDOF=dof_distribution->getLastComponent();
803 caltinay 4492 const int mpiSize=MPIInfo->size;
804     const int myRank=MPIInfo->rank;
805    
806     int min_DOF, max_DOF;
807     std::pair<int,int> DOF_range(util::getFlaggedMinMaxInt(
808     numNodes, globalDOFIndex, -1));
809    
810     if (DOF_range.second < DOF_range.first) {
811     min_DOF=myFirstDOF;
812     max_DOF=myLastDOF-1;
813     } else {
814     min_DOF=DOF_range.first;
815     max_DOF=DOF_range.second;
816     }
817    
818     int p_min=mpiSize;
819     int p_max=-1;
820     if (max_DOF >= min_DOF) {
821     for (int p=0; p<mpiSize; ++p) {
822     if (dof_distribution->first_component[p]<=min_DOF) p_min=p;
823     if (dof_distribution->first_component[p]<=max_DOF) p_max=p;
824     }
825     }
826    
827     if (!((min_DOF<=myFirstDOF) && (myLastDOF-1<=max_DOF))) {
828 caltinay 4496 setError(SYSTEM_ERROR, "Local elements do not span local degrees of freedom.");
829 caltinay 4492 return;
830     }
831     const int UNUSED = -1;
832     const int len_loc_dof=max_DOF-min_DOF+1;
833     std::vector<int> shared(numNodes*(p_max-p_min+1));
834     std::vector<int> offsetInShared(mpiSize+1);
835     std::vector<int> locDOFMask(len_loc_dof, UNUSED);
836    
837     #pragma omp parallel
838     {
839     #pragma omp for
840     for (int i=0;i<numNodes;++i) {
841     const int k=globalDOFIndex[i];
842     if (k > -1) {
843     #ifdef BOUNDS_CHECK
844     if ((k-min_DOF)>=len_loc_dof) {
845     printf("BOUNDS_CHECK %s %d i=%d k=%d min_DOF=%d\n", __FILE__, __LINE__, i, k, min_DOF);
846     exit(1);
847     }
848     #endif
849     locDOFMask[k-min_DOF]=UNUSED-1;
850     }
851     }
852     #ifdef BOUNDS_CHECK
853     if (myLastDOF-min_DOF > len_loc_dof) {
854     printf("BOUNDS_CHECK %s %d\n", __FILE__, __LINE__);
855     exit(1);
856     }
857     #endif
858     #pragma omp for
859     for (int i=myFirstDOF-min_DOF; i<myLastDOF-min_DOF; ++i) {
860     locDOFMask[i]=i-myFirstDOF+min_DOF;
861     }
862     }
863    
864     std::vector<int> wanted_DOFs(numNodes);
865     std::vector<int> rcv_len(mpiSize);
866     std::vector<int> snd_len(mpiSize);
867     std::vector<int> neighbor(mpiSize);
868     int numNeighbors=0;
869     int n=0;
870     int lastn=n;
871     for (int p=p_min; p<=p_max; ++p) {
872     if (p != myRank) {
873     const int firstDOF=std::max(min_DOF, dof_distribution->first_component[p]);
874     const int lastDOF=std::min(max_DOF+1, dof_distribution->first_component[p+1]);
875     #ifdef BOUNDS_CHECK
876     if (firstDOF-min_DOF<0 || lastDOF-min_DOF>len_loc_dof) {
877     printf("BOUNDS_CHECK %s %d p=%d\n", __FILE__, __LINE__, p);
878     exit(1);
879     }
880     #endif
881     for (int i=firstDOF-min_DOF; i<lastDOF-min_DOF; ++i) {
882     if (locDOFMask[i] == UNUSED-1) {
883     locDOFMask[i]=myLastDOF-myFirstDOF+n;
884     wanted_DOFs[n]=i+min_DOF;
885     ++n;
886     }
887     }
888     if (n > lastn) {
889     rcv_len[p]=n-lastn;
890     #ifdef BOUNDS_CHECK
891     if (numNeighbors >= mpiSize+1) {
892     printf("BOUNDS_CHECK %s %d p=%d numNeighbors=%d n=%d\n", __FILE__, __LINE__, p, numNeighbors, n);
893     exit(1);
894     }
895     #endif
896     neighbor[numNeighbors]=p;
897     offsetInShared[numNeighbors]=lastn;
898     numNeighbors++;
899     lastn=n;
900     }
901     } // if p!=myRank
902     } // for p
903    
904     #ifdef BOUNDS_CHECK
905     if (numNeighbors >= mpiSize+1) {
906     printf("BOUNDS_CHECK %s %d numNeighbors=%d\n", __FILE__, __LINE__, numNeighbors);
907     exit(1);
908     }
909     #endif
910     offsetInShared[numNeighbors]=lastn;
911    
912     // assign new DOF labels to nodes
913     std::vector<int> nodeMask(numNodes, UNUSED);
914     #pragma omp parallel for
915     for (int i=0; i<numNodes; ++i) {
916     const int k=globalDOFIndex[i];
917     if (k > -1)
918     nodeMask[i]=locDOFMask[k-min_DOF];
919     }
920    
921     // now we can set the mapping from nodes to local DOFs
922     if (use_reduced_elements) {
923     reducedDegreesOfFreedomMapping.assign(nodeMask, UNUSED);
924     } else {
925     degreesOfFreedomMapping.assign(nodeMask, UNUSED);
926     }
927    
928     // define how to get DOF values for controlled but other processors
929     #ifdef BOUNDS_CHECK
930 caltinay 4496 if (offsetInShared[numNeighbors] >= numNodes*(p_max-p_min+1)) {
931 caltinay 4492 printf("BOUNDS_CHECK %s %d\n", __FILE__, __LINE__);
932     exit(1);
933     }
934     #endif
935     #pragma omp parallel for
936     for (int i=0; i<offsetInShared[numNeighbors]; ++i)
937     shared[i]=myLastDOF-myFirstDOF+i;
938    
939 caltinay 4816 paso::SharedComponents_ptr rcv_shcomp(new paso::SharedComponents(
940 caltinay 4492 myLastDOF-myFirstDOF, numNeighbors, &neighbor[0], &shared[0],
941 caltinay 4816 &offsetInShared[0], 1, 0, MPIInfo));
942 caltinay 4492
943     /////////////////////////////////
944     // now we build the sender //
945     /////////////////////////////////
946     #ifdef ESYS_MPI
947     std::vector<MPI_Request> mpi_requests(mpiSize*2);
948     std::vector<MPI_Status> mpi_stati(mpiSize*2);
949     MPI_Alltoall(&rcv_len[0], 1, MPI_INT, &snd_len[0], 1, MPI_INT, MPIInfo->comm);
950     int count=0;
951     #else
952     snd_len[0]=rcv_len[0];
953     #endif
954    
955     for (int p=0; p<rcv_shcomp->numNeighbors; p++) {
956     #ifdef ESYS_MPI
957     MPI_Isend(&(wanted_DOFs[rcv_shcomp->offsetInShared[p]]),
958     rcv_shcomp->offsetInShared[p+1]-rcv_shcomp->offsetInShared[p],
959     MPI_INT, rcv_shcomp->neighbor[p],
960     MPIInfo->msg_tag_counter+myRank, MPIInfo->comm,
961     &mpi_requests[count]);
962     count++;
963     #endif
964     }
965     n=0;
966     numNeighbors=0;
967     for (int p=0; p<mpiSize; p++) {
968     if (snd_len[p] > 0) {
969     #ifdef ESYS_MPI
970     MPI_Irecv(&shared[n], snd_len[p], MPI_INT, p,
971     MPIInfo->msg_tag_counter+p, MPIInfo->comm,
972     &mpi_requests[count]);
973     count++;
974     #endif
975     neighbor[numNeighbors]=p;
976     offsetInShared[numNeighbors]=n;
977     numNeighbors++;
978     n+=snd_len[p];
979     }
980     }
981 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, MPIInfo->size)
982 caltinay 4492 offsetInShared[numNeighbors]=n;
983     #ifdef ESYS_MPI
984     MPI_Waitall(count, &mpi_requests[0], &mpi_stati[0]);
985     #endif
986     // map global ids to local id's
987     #pragma omp parallel for
988     for (int i=0; i<offsetInShared[numNeighbors]; ++i) {
989     shared[i]=locDOFMask[shared[i]-min_DOF];
990     }
991    
992 caltinay 4816 paso::SharedComponents_ptr snd_shcomp(new paso::SharedComponents(
993 caltinay 4492 myLastDOF-myFirstDOF, numNeighbors, &neighbor[0], &shared[0],
994 caltinay 4816 &offsetInShared[0], 1, 0, MPIInfo));
995 caltinay 4492
996 caltinay 4496 if (noError()) {
997 caltinay 4492 if (use_reduced_elements) {
998 caltinay 4801 reducedDegreesOfFreedomConnector=paso::Connector_alloc(snd_shcomp, rcv_shcomp);
999 caltinay 4492 } else {
1000 caltinay 4801 degreesOfFreedomConnector=paso::Connector_alloc(snd_shcomp, rcv_shcomp);
1001 caltinay 4492 }
1002     }
1003     }
1004    
1005 caltinay 4496 void NodeFile::createNodeMappings(const std::vector<int>& indexReducedNodes,
1006     const std::vector<int>& dofDist,
1007     const std::vector<int>& nodeDist)
1008 caltinay 4492 {
1009     const int mpiSize=MPIInfo->size;
1010     const int myRank=MPIInfo->rank;
1011    
1012     const int myFirstDOF=dofDist[myRank];
1013     const int myLastDOF=dofDist[myRank+1];
1014     const int myNumDOF=myLastDOF-myFirstDOF;
1015    
1016     const int myFirstNode=nodeDist[myRank];
1017     const int myLastNode=nodeDist[myRank+1];
1018     const int myNumNodes=myLastNode-myFirstNode;
1019    
1020 caltinay 4496 std::vector<short> maskMyReducedDOF(myNumDOF, -1);
1021     std::vector<short> maskMyReducedNodes(myNumNodes, -1);
1022 caltinay 4492
1023     // mark the nodes used by the reduced mesh
1024     #pragma omp parallel for
1025 caltinay 4496 for (int i=0; i<indexReducedNodes.size(); ++i) {
1026 caltinay 4492 int k=globalNodesIndex[indexReducedNodes[i]];
1027     if (k>=myFirstNode && myLastNode>k)
1028 caltinay 4496 maskMyReducedNodes[k-myFirstNode]=1;
1029 caltinay 4492 k=globalDegreesOfFreedom[indexReducedNodes[i]];
1030     if (k>=myFirstDOF && myLastDOF>k) {
1031 caltinay 4496 maskMyReducedDOF[k-myFirstDOF]=1;
1032 caltinay 4492 }
1033     }
1034 caltinay 4496 std::vector<int> indexMyReducedDOF = util::packMask(maskMyReducedDOF);
1035     int myNumReducedDOF=indexMyReducedDOF.size();
1036     std::vector<int> indexMyReducedNodes = util::packMask(maskMyReducedNodes);
1037     int myNumReducedNodes=indexMyReducedNodes.size();
1038 caltinay 4492
1039     std::vector<int> rdofDist(mpiSize+1);
1040     std::vector<int> rnodeDist(mpiSize+1);
1041     #ifdef ESYS_MPI
1042     MPI_Allgather(&myNumReducedNodes, 1, MPI_INT, &rnodeDist[0], 1, MPI_INT, MPIInfo->comm);
1043     MPI_Allgather(&myNumReducedDOF, 1, MPI_INT, &rdofDist[0], 1, MPI_INT, MPIInfo->comm);
1044     #else
1045     rnodeDist[0]=myNumReducedNodes;
1046     rdofDist[0]=myNumReducedDOF;
1047     #endif
1048     int globalNumReducedNodes=0;
1049     int globalNumReducedDOF=0;
1050     for (int i=0; i<mpiSize;++i) {
1051     int k=rnodeDist[i];
1052     rnodeDist[i]=globalNumReducedNodes;
1053     globalNumReducedNodes+=k;
1054    
1055     k=rdofDist[i];
1056     rdofDist[i]=globalNumReducedDOF;
1057     globalNumReducedDOF+=k;
1058     }
1059     rnodeDist[mpiSize]=globalNumReducedNodes;
1060     rdofDist[mpiSize]=globalNumReducedDOF;
1061    
1062     // ==== distribution of Nodes ===============================
1063 caltinay 4814 nodesDistribution.reset(new paso::Distribution(MPIInfo, &nodeDist[0], 1, 0));
1064 caltinay 4492 // ==== distribution of DOFs ================================
1065 caltinay 4814 degreesOfFreedomDistribution.reset(new paso::Distribution(MPIInfo, &dofDist[0], 1, 0));
1066 caltinay 4492 // ==== distribution of reduced Nodes =======================
1067 caltinay 4814 reducedNodesDistribution.reset(new paso::Distribution(MPIInfo, &rnodeDist[0], 1, 0));
1068 caltinay 4492 // ==== distribution of reduced DOF =========================
1069 caltinay 4814 reducedDegreesOfFreedomDistribution.reset(new paso::Distribution(
1070     MPIInfo, &rdofDist[0], 1, 0));
1071 caltinay 4492
1072     std::vector<int> nodeMask(numNodes);
1073    
1074 caltinay 4496 if (noError()) {
1075 caltinay 4492 const int UNUSED = -1;
1076     // ==== nodes mapping which is a dummy structure ========
1077     #pragma omp parallel for
1078     for (int i=0; i<numNodes; ++i)
1079     nodeMask[i]=i;
1080     nodesMapping.assign(nodeMask, UNUSED);
1081    
1082     // ==== mapping between nodes and reduced nodes ==========
1083     #pragma omp parallel for
1084     for (int i=0; i<numNodes; ++i)
1085     nodeMask[i]=UNUSED;
1086     #pragma omp parallel for
1087 caltinay 4496 for (int i=0; i<indexReducedNodes.size(); ++i)
1088 caltinay 4492 nodeMask[indexReducedNodes[i]]=i;
1089     reducedNodesMapping.assign(nodeMask, UNUSED);
1090     }
1091     // ==== mapping between nodes and DOFs + DOF connector
1092 caltinay 4496 if (noError())
1093 caltinay 4492 createDOFMappingAndCoupling(false);
1094     // ==== mapping between nodes and reduced DOFs + reduced DOF connector
1095 caltinay 4496 if (noError())
1096 caltinay 4492 createDOFMappingAndCoupling(true);
1097    
1098     // get the Ids for DOFs and reduced nodes
1099 caltinay 4496 if (noError()) {
1100 caltinay 4493 #pragma omp parallel
1101 caltinay 4492 {
1102     #pragma omp for
1103     for (int i=0; i<reducedNodesMapping.getNumTargets(); ++i)
1104     reducedNodesId[i]=Id[reducedNodesMapping.map[i]];
1105     #pragma omp for
1106     for (int i=0; i<degreesOfFreedomMapping.getNumTargets(); ++i)
1107     degreesOfFreedomId[i]=Id[degreesOfFreedomMapping.map[i]];
1108     #pragma omp for
1109     for (int i=0; i<reducedDegreesOfFreedomMapping.getNumTargets(); ++i)
1110     reducedDegreesOfFreedomId[i]=Id[reducedDegreesOfFreedomMapping.map[i]];
1111     }
1112     } else {
1113 caltinay 4814 nodesDistribution.reset();
1114     reducedNodesDistribution.reset();
1115     degreesOfFreedomDistribution.reset();
1116     reducedDegreesOfFreedomDistribution.reset();
1117 caltinay 4801 paso::Connector_free(degreesOfFreedomConnector);
1118     paso::Connector_free(reducedDegreesOfFreedomConnector);
1119 caltinay 4492 degreesOfFreedomConnector=NULL;
1120     reducedDegreesOfFreedomConnector=NULL;
1121     }
1122     }
1123    
1124 caltinay 4428 } // namespace finley
1125 ksteube 1312

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/lapack2681/finley/src/NodeFile.cpp:2682-2741 /branches/pasowrap/finley/src/NodeFile.cpp:3661-3674 /branches/py3_attempt2/finley/src/NodeFile.cpp:3871-3891 /branches/restext/finley/src/NodeFile.cpp:2610-2624 /branches/ripleygmg_from_3668/finley/src/NodeFile.cpp:3669-3791 /branches/stage3.0/finley/src/NodeFile.cpp:2569-2590 /branches/symbolic_from_3470/finley/src/NodeFile.cpp:3471-3974 /release/3.0/finley/src/NodeFile.cpp:2591-2601 /trunk/finley/src/NodeFile.cpp:4257-4344

  ViewVC Help
Powered by ViewVC 1.1.26