/[escript]/trunk/finley/src/NodeFile.cpp
ViewVC logotype

Annotation of /trunk/finley/src/NodeFile.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4657 - (hide annotations)
Thu Feb 6 06:12:20 2014 UTC (5 years, 7 months ago) by jfenwick
File size: 40800 byte(s)
I changed some files.
Updated copyright notices, added GeoComp.



1 jgs 150
2 jfenwick 3981 /*****************************************************************************
3 ksteube 1811 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 ksteube 1811 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 ksteube 1312
17 ksteube 1811
18 caltinay 4428 /****************************************************************************
19 jgs 82
20 caltinay 4428 Finley: NodeFile
21    
22     *****************************************************************************/
23    
24 jgs 82 #include "NodeFile.h"
25 caltinay 4428 #include <escript/Data.h>
26 caltinay 4492 #include <paso/Coupler.h>
27 jgs 82
28 caltinay 4428 #include <limits>
29     #include <sstream>
30 jgs 82
31 caltinay 4428 namespace finley {
32 ksteube 1312
33 caltinay 4428 // helper function
34     static void scatterEntries(int n, int* index, int min_index, int max_index,
35     int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
36     int* globalDegreesOfFreedom_out,
37     int* globalDegreesOfFreedom_in,
38     int numDim, double* Coordinates_out,
39     double* Coordinates_in)
40 ksteube 1312 {
41 caltinay 4428 const int range = max_index-min_index;
42     const size_t numDim_size = numDim*sizeof(double);
43 ksteube 1312
44 caltinay 4428 #pragma omp parallel for
45     for (int i=0; i<n; i++) {
46     const int k=index[i]-min_index;
47     if ((k>=0) && (k<range)) {
48     Id_out[k]=Id_in[i];
49     Tag_out[k]=Tag_in[i];
50     globalDegreesOfFreedom_out[k]=globalDegreesOfFreedom_in[i];
51     memcpy(&(Coordinates_out[INDEX2(0,k,numDim)]),
52     &(Coordinates_in[INDEX2(0,i,numDim)]), numDim_size);
53     }
54     }
55     }
56 ksteube 1312
57 caltinay 4428 // helper function
58 caltinay 4496 static void gatherEntries(int n, const int* index, int min_index, int max_index,
59 caltinay 4492 int* Id_out, int* Id_in, int* Tag_out, int* Tag_in,
60 caltinay 4428 int* globalDegreesOfFreedom_out,
61 caltinay 4492 int* globalDegreesOfFreedom_in,
62 caltinay 4428 int numDim, double* Coordinates_out,
63     double* Coordinates_in)
64     {
65     const int range = max_index-min_index;
66     const size_t numDim_size = numDim*sizeof(double);
67 ksteube 1312
68 caltinay 4428 #pragma omp parallel for
69     for (int i=0; i<n; i++) {
70     const int k=index[i]-min_index;
71     if ((k>=0) && (k<range)) {
72     Id_out[i]=Id_in[k];
73     Tag_out[i]=Tag_in[k];
74     globalDegreesOfFreedom_out[i]=globalDegreesOfFreedom_in[k];
75     memcpy(&(Coordinates_out[INDEX2(0,i,numDim)]),
76     &(Coordinates_in[INDEX2(0,k,numDim)]), numDim_size);
77     }
78     }
79 jgs 82 }
80    
81 caltinay 4428 /// constructor
82     /// use NodeFile::allocTable to allocate the node table (Id,Coordinates)
83     NodeFile::NodeFile(int nDim, Esys_MPIInfo *mpiInfo) :
84     numNodes(0),
85     numDim(nDim),
86     Id(NULL),
87     Tag(NULL),
88     globalDegreesOfFreedom(NULL),
89     Coordinates(NULL),
90     globalReducedDOFIndex(NULL),
91     globalReducedNodesIndex(NULL),
92     globalNodesIndex(NULL),
93     nodesDistribution(NULL),
94     reducedNodesDistribution(NULL),
95     degreesOfFreedomDistribution(NULL),
96     reducedDegreesOfFreedomDistribution(NULL),
97     degreesOfFreedomConnector(NULL),
98     reducedDegreesOfFreedomConnector(NULL),
99     reducedNodesId(NULL),
100     degreesOfFreedomId(NULL),
101     reducedDegreesOfFreedomId(NULL),
102     status(FINLEY_INITIAL_STATUS)
103     {
104     MPIInfo = Esys_MPIInfo_getReference(mpiInfo);
105 jgs 82 }
106 ksteube 1312
107 caltinay 4428 /// destructor
108     NodeFile::~NodeFile()
109     {
110     freeTable();
111     Esys_MPIInfo_free(MPIInfo);
112 ksteube 1312 }
113    
114 caltinay 4431 /// allocates the node table within this node file to hold NN nodes.
115 caltinay 4492 void NodeFile::allocTable(int NN)
116 caltinay 4428 {
117     if (numNodes>0)
118     freeTable();
119    
120     Id=new int[NN];
121     Coordinates=new double[NN*numDim];
122     Tag=new int[NN];
123     globalDegreesOfFreedom=new int[NN];
124     globalReducedDOFIndex=new int[NN];
125     globalReducedNodesIndex=new int[NN];
126     globalNodesIndex=new int[NN];
127     reducedNodesId=new int[NN];
128     degreesOfFreedomId=new int[NN];
129     reducedDegreesOfFreedomId=new int[NN];
130     numNodes=NN;
131    
132     // this initialization makes sure that data are located on the right
133     // processor
134     #pragma omp parallel for
135     for (int n=0; n<numNodes; n++) {
136     Id[n]=-1;
137     for (int i=0; i<numDim; i++)
138     Coordinates[INDEX2(i,n,numDim)]=0.;
139     Tag[n]=-1;
140     globalDegreesOfFreedom[n]=-1;
141     globalReducedDOFIndex[n]=-1;
142     globalReducedNodesIndex[n]=-1;
143     globalNodesIndex[n]=-1;
144     reducedNodesId[n]=-1;
145     degreesOfFreedomId[n]=-1;
146 caltinay 4492 reducedDegreesOfFreedomId[n]=-1;
147 caltinay 4428 }
148 ksteube 1312 }
149    
150 caltinay 4431 /// frees the node table within this node file
151 caltinay 4428 void NodeFile::freeTable()
152     {
153     delete[] Id;
154     delete[] Coordinates;
155     delete[] globalDegreesOfFreedom;
156     delete[] globalReducedDOFIndex;
157     delete[] globalReducedNodesIndex;
158     delete[] globalNodesIndex;
159     delete[] Tag;
160     delete[] reducedNodesId;
161     delete[] degreesOfFreedomId;
162     delete[] reducedDegreesOfFreedomId;
163     tagsInUse.clear();
164 caltinay 4492 nodesMapping.clear();
165     reducedNodesMapping.clear();
166     degreesOfFreedomMapping.clear();
167     reducedDegreesOfFreedomMapping.clear();
168 caltinay 4428 Paso_Distribution_free(nodesDistribution);
169     nodesDistribution=NULL;
170     Paso_Distribution_free(reducedNodesDistribution);
171     nodesDistribution=NULL;
172     Paso_Distribution_free(degreesOfFreedomDistribution);
173     degreesOfFreedomDistribution=NULL;
174     Paso_Distribution_free(reducedDegreesOfFreedomDistribution);
175     reducedDegreesOfFreedomDistribution=NULL;
176     Paso_Connector_free(degreesOfFreedomConnector);
177     degreesOfFreedomConnector=NULL;
178     Paso_Connector_free(reducedDegreesOfFreedomConnector);
179     reducedDegreesOfFreedomConnector=NULL;
180    
181     numNodes=0;
182 ksteube 1312 }
183 caltinay 4428
184 caltinay 4492 void NodeFile::print() const
185     {
186     std::cout << "=== " << numDim << "D-Nodes:\nnumber of nodes=" << numNodes
187     << std::endl;
188     std::cout << "Id,Tag,globalDegreesOfFreedom,degreesOfFreedom,reducedDegreesOfFeedom,node,reducedNode,Coordinates" << std::endl;
189     for (int i=0; i<numNodes; i++) {
190     std::cout << Id[i] << "," << Tag[i] << "," << globalDegreesOfFreedom[i]
191     << "," << degreesOfFreedomMapping.target[i]
192     << "," << reducedDegreesOfFreedomMapping.target[i]
193     << "," << nodesMapping.target[i] << reducedNodesMapping.target[i]
194     << " ";
195     std::cout.precision(15);
196     std::cout.setf(std::ios::scientific, std::ios::floatfield);
197     for (int j=0; j<numDim; j++)
198     std:: cout << Coordinates[INDEX2(j,i,numDim)];
199     std::cout << std::endl;
200     }
201     }
202    
203 caltinay 4428 /// copies the array newX into this->coordinates
204 caltinay 4626 void NodeFile::setCoordinates(const escript::Data& newX)
205 caltinay 4428 {
206 caltinay 4626 if (newX.getDataPointSize() != numDim) {
207 caltinay 4428 std::stringstream ss;
208     ss << "NodeFile::setCoordinates: number of dimensions of new "
209     "coordinates has to be " << numDim;
210     const std::string errorMsg(ss.str());
211 caltinay 4496 setError(VALUE_ERROR, errorMsg.c_str());
212 caltinay 4626 } else if (newX.getNumDataPointsPerSample() != 1 ||
213     newX.getNumSamples() != numNodes) {
214 caltinay 4428 std::stringstream ss;
215     ss << "NodeFile::setCoordinates: number of given nodes must be "
216     << numNodes;
217     const std::string errorMsg(ss.str());
218 caltinay 4496 setError(VALUE_ERROR, errorMsg.c_str());
219 caltinay 4428 } else {
220     const size_t numDim_size=numDim*sizeof(double);
221 caltinay 4496 ++status;
222 caltinay 4428 #pragma omp parallel for
223     for (int n=0; n<numNodes; n++) {
224     memcpy(&(Coordinates[INDEX2(0,n,numDim)]), newX.getSampleDataRO(n), numDim_size);
225     }
226     }
227 ksteube 1312 }
228    
229 caltinay 4428 /// sets tags to newTag where mask>0
230 caltinay 4626 void NodeFile::setTags(const int newTag, const escript::Data& mask)
231 caltinay 4428 {
232 caltinay 4496 resetError();
233 caltinay 4428
234 caltinay 4626 if (1 != mask.getDataPointSize()) {
235 caltinay 4496 setError(TYPE_ERROR, "NodeFile::setTags: number of components of mask must be 1.");
236 caltinay 4428 return;
237 caltinay 4626 } else if (mask.getNumDataPointsPerSample() != 1 ||
238     mask.getNumSamples() != numNodes) {
239 caltinay 4496 setError(TYPE_ERROR, "NodeFile::setTags: illegal number of samples of mask Data object");
240 caltinay 4428 return;
241     }
242    
243     #pragma omp parallel for
244     for (int n=0; n<numNodes; n++) {
245     if (mask.getSampleDataRO(n)[0] > 0)
246     Tag[n]=newTag;
247     }
248     updateTagList();
249 ksteube 1312 }
250    
251 caltinay 4428 std::pair<int,int> NodeFile::getDOFRange() const
252     {
253 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(
254     1, numNodes, globalDegreesOfFreedom));
255 caltinay 4428 if (result.second < result.first) {
256     result.first = -1;
257     result.second = 0;
258     }
259     return result;
260 ksteube 1312 }
261 caltinay 4428
262     std::pair<int,int> NodeFile::getGlobalIdRange() const
263     {
264 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(1, numNodes, Id));
265 caltinay 4428
266     #ifdef ESYS_MPI
267     int global_id_range[2];
268     int id_range[2] = { -result.first, result.second };
269     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
270     result.first = -global_id_range[0];
271     result.second = global_id_range[1];
272     #endif
273     if (result.second < result.first) {
274     result.first = -1;
275     result.second = 0;
276     }
277     return result;
278 ksteube 1312 }
279    
280 caltinay 4428 std::pair<int,int> NodeFile::getGlobalDOFRange() const
281     {
282 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(
283     1, numNodes, globalDegreesOfFreedom));
284 ksteube 1312
285 caltinay 4428 #ifdef ESYS_MPI
286     int global_id_range[2];
287     int id_range[2] = { -result.first, result.second };
288     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
289     result.first = -global_id_range[0];
290     result.second = global_id_range[1];
291     #endif
292     if (result.second < result.first) {
293     result.first = -1;
294     result.second = 0;
295     }
296     return result;
297 ksteube 1312 }
298 caltinay 4428
299     std::pair<int,int> NodeFile::getGlobalNodeIDIndexRange() const
300     {
301 caltinay 4441 std::pair<int,int> result(util::getMinMaxInt(1, numNodes, globalNodesIndex));
302 caltinay 4428
303     #ifdef ESYS_MPI
304     int global_id_range[2];
305     int id_range[2] = { -result.first, result.second };
306     MPI_Allreduce(id_range, global_id_range, 2, MPI_INT, MPI_MAX, MPIInfo->comm);
307     result.first = -global_id_range[0];
308     result.second = global_id_range[1];
309     #endif
310     if (result.second < result.first) {
311     result.first = -1;
312     result.second = 0;
313     }
314     return result;
315 ksteube 1312 }
316    
317 caltinay 4428 void NodeFile::copyTable(int offset, int idOffset, int dofOffset,
318     const NodeFile* in)
319     {
320     // check number of dimensions and table size
321     if (numDim != in->numDim) {
322 caltinay 4496 setError(TYPE_ERROR, "NodeFile::copyTable: dimensions of node files don't match");
323 caltinay 4428 return;
324     }
325     if (numNodes < in->numNodes+offset) {
326 caltinay 4496 setError(MEMORY_ERROR, "NodeFile::copyTable: node table is too small.");
327 caltinay 4428 return;
328     }
329 ksteube 1312
330 caltinay 4428 #pragma omp parallel for
331     for (int n=0; n<in->numNodes; n++) {
332     Id[offset+n]=in->Id[n]+idOffset;
333     Tag[offset+n]=in->Tag[n];
334     globalDegreesOfFreedom[offset+n]=in->globalDegreesOfFreedom[n]+dofOffset;
335     for(int i=0; i<numDim; i++)
336     Coordinates[INDEX2(i, offset+n, numDim)] =
337     in->Coordinates[INDEX2(i, n, in->numDim)];
338     }
339 ksteube 1312 }
340    
341 caltinay 4428 /// scatters the NodeFile in into this NodeFile using index[0:in->numNodes-1].
342     /// index has to be between 0 and numNodes-1.
343     /// colouring is chosen for the worst case
344     void NodeFile::scatter(int* index, const NodeFile* in)
345     {
346     scatterEntries(numNodes, index, 0, in->numNodes, Id, in->Id, Tag, in->Tag,
347     globalDegreesOfFreedom, in->globalDegreesOfFreedom,
348     numDim, Coordinates, in->Coordinates);
349 ksteube 1312 }
350    
351 caltinay 4428 /// gathers this NodeFile from the NodeFile 'in' using the entries in
352     /// index[0:out->numNodes-1] which are between min_index and max_index
353     /// (exclusive)
354 caltinay 4492 void NodeFile::gather(int* index, const NodeFile* in)
355 caltinay 4428 {
356     const std::pair<int,int> id_range(in->getGlobalIdRange());
357     gatherEntries(numNodes, index, id_range.first, id_range.second, Id, in->Id,
358     Tag, in->Tag, globalDegreesOfFreedom, in->globalDegreesOfFreedom,
359     numDim, Coordinates, in->Coordinates);
360 ksteube 1312 }
361    
362 caltinay 4496 void NodeFile::gather_global(const std::vector<int>& index, const NodeFile* in)
363 caltinay 4428 {
364     // get the global range of node ids
365     const std::pair<int,int> id_range(in->getGlobalIdRange());
366     const int undefined_node=id_range.first-1;
367     std::vector<int> distribution(in->MPIInfo->size+1);
368    
369     // distribute the range of node ids
370     int buffer_len=Esys_MPIInfo_setDistribution(in->MPIInfo,
371     id_range.first, id_range.second, &distribution[0]);
372    
373     // allocate buffers
374     int *Id_buffer=new int[buffer_len];
375     int *Tag_buffer=new int[buffer_len];
376     int *globalDegreesOfFreedom_buffer=new int[buffer_len];
377     double *Coordinates_buffer=new double[buffer_len*numDim];
378    
379     // fill Id_buffer by the undefined_node marker to check if nodes
380     // are defined
381     #pragma omp parallel for
382     for (int n=0; n<buffer_len; n++)
383     Id_buffer[n]=undefined_node;
384 caltinay 4492
385 caltinay 4428 // fill the buffer by sending portions around in a circle
386     #ifdef ESYS_MPI
387     MPI_Status status;
388     int dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
389     int source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
390     #endif
391     int buffer_rank=in->MPIInfo->rank;
392     for (int p=0; p<in->MPIInfo->size; ++p) {
393     if (p>0) { // the initial send can be skipped
394     #ifdef ESYS_MPI
395     MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
396     in->MPIInfo->msg_tag_counter, source,
397     in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
398     MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
399     in->MPIInfo->msg_tag_counter+1, source,
400     in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
401     MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
402     MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
403     in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
404     MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
405     MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
406     in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
407     #endif
408 jfenwick 4505 ESYS_MPI_INC_COUNTER(*(in->MPIInfo), 4)
409 caltinay 4428 }
410     buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
411     scatterEntries(in->numNodes, in->Id, distribution[buffer_rank],
412     distribution[buffer_rank+1], Id_buffer, in->Id,
413     Tag_buffer, in->Tag, globalDegreesOfFreedom_buffer,
414     in->globalDegreesOfFreedom, numDim, Coordinates_buffer,
415     in->Coordinates);
416     }
417     // now entries are collected from the buffer again by sending the
418     // entries around in a circle
419     #ifdef ESYS_MPI
420     dest=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank+1);
421     source=Esys_MPIInfo_mod(in->MPIInfo->size, in->MPIInfo->rank-1);
422     #endif
423     buffer_rank=in->MPIInfo->rank;
424     for (int p=0; p<in->MPIInfo->size; ++p) {
425 caltinay 4496 gatherEntries(numNodes, &index[0], distribution[buffer_rank],
426 caltinay 4428 distribution[buffer_rank+1], Id, Id_buffer, Tag, Tag_buffer,
427     globalDegreesOfFreedom, globalDegreesOfFreedom_buffer, numDim,
428     Coordinates, Coordinates_buffer);
429     if (p < in->MPIInfo->size-1) { // the last send can be skipped
430     #ifdef ESYS_MPI
431     MPI_Sendrecv_replace(Id_buffer, buffer_len, MPI_INT, dest,
432     in->MPIInfo->msg_tag_counter, source,
433     in->MPIInfo->msg_tag_counter, in->MPIInfo->comm, &status);
434     MPI_Sendrecv_replace(Tag_buffer, buffer_len, MPI_INT, dest,
435     in->MPIInfo->msg_tag_counter+1, source,
436     in->MPIInfo->msg_tag_counter+1, in->MPIInfo->comm, &status);
437     MPI_Sendrecv_replace(globalDegreesOfFreedom_buffer, buffer_len,
438     MPI_INT, dest, in->MPIInfo->msg_tag_counter+2, source,
439     in->MPIInfo->msg_tag_counter+2, in->MPIInfo->comm, &status);
440     MPI_Sendrecv_replace(Coordinates_buffer, buffer_len*numDim,
441     MPI_DOUBLE, dest, in->MPIInfo->msg_tag_counter+3, source,
442     in->MPIInfo->msg_tag_counter+3, in->MPIInfo->comm, &status);
443     #endif
444 jfenwick 4505 ESYS_MPI_INC_COUNTER(*(in->MPIInfo), 4)
445 caltinay 4428 }
446     buffer_rank=Esys_MPIInfo_mod(in->MPIInfo->size, buffer_rank-1);
447     }
448     // check if all nodes are set:
449     #pragma omp parallel for
450     for (int n=0; n<numNodes; ++n) {
451     if (Id[n] == undefined_node) {
452     std::stringstream ss;
453     ss << "NodeFile::gather_global: Node id " << Id[n]
454     << " at position " << n << " is referenced but not defined.";
455     const std::string errorMsg(ss.str());
456 caltinay 4496 setError(VALUE_ERROR, errorMsg.c_str());
457 caltinay 4428 }
458     }
459     delete[] Id_buffer;
460     delete[] Tag_buffer;
461     delete[] globalDegreesOfFreedom_buffer;
462     delete[] Coordinates_buffer;
463     // make sure that the error is global
464     Esys_MPIInfo_noError(in->MPIInfo);
465 ksteube 1312 }
466    
467 caltinay 4496 void NodeFile::assignMPIRankToDOFs(std::vector<int>& mpiRankOfDOF,
468     const std::vector<int>& distribution)
469 caltinay 4428 {
470     Esys_MPI_rank p_min=MPIInfo->size, p_max=-1;
471     // first we retrieve the min and max DOF on this processor to reduce
472     // costs for searching
473     const std::pair<int,int> dof_range(getDOFRange());
474    
475     for (int p=0; p<MPIInfo->size; ++p) {
476     if (distribution[p]<=dof_range.first) p_min=p;
477     if (distribution[p]<=dof_range.second) p_max=p;
478     }
479     #pragma omp parallel for
480     for (int n=0; n<numNodes; ++n) {
481     const int k=globalDegreesOfFreedom[n];
482     for (int p=p_min; p<=p_max; ++p) {
483     if (k < distribution[p+1]) {
484     mpiRankOfDOF[n]=p;
485     break;
486     }
487     }
488     }
489 caltinay 4492 }
490 caltinay 4428
491 caltinay 4496 int NodeFile::prepareLabeling(const std::vector<short>& mask,
492     std::vector<int>& buffer,
493 caltinay 4428 std::vector<int>& distribution, bool useNodes)
494     {
495     const int UNSET_ID=-1,SET_ID=1;
496    
497     // get the global range of DOF/node ids
498     std::pair<int,int> idRange(useNodes ?
499     getGlobalNodeIDIndexRange() : getGlobalDOFRange());
500     const int* indexArray = (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
501     // distribute the range of node ids
502     distribution.assign(MPIInfo->size+1, 0);
503     int buffer_len=Esys_MPIInfo_setDistribution(MPIInfo, idRange.first,
504     idRange.second, &distribution[0]);
505     const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
506    
507     // fill buffer by the UNSET_ID marker to check if nodes are defined
508     buffer.assign(buffer_len, UNSET_ID);
509    
510     // fill the buffer by sending portions around in a circle
511     #ifdef ESYS_MPI
512     MPI_Status status;
513     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
514     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
515     #endif
516     int buffer_rank=MPIInfo->rank;
517     for (int p=0; p<MPIInfo->size; ++p) {
518     if (p>0) { // the initial send can be skipped
519     #ifdef ESYS_MPI
520     MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
521     MPIInfo->msg_tag_counter, source, MPIInfo->msg_tag_counter,
522     MPIInfo->comm, &status);
523     #endif
524     MPIInfo->msg_tag_counter++;
525     }
526     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
527     const int id0=distribution[buffer_rank];
528     const int id1=distribution[buffer_rank+1];
529     #pragma omp parallel for
530     for (int n=0; n<numNodes; n++) {
531 caltinay 4496 if (mask.size()<numNodes || mask[n]>-1) {
532 caltinay 4428 const int k=indexArray[n];
533     if (id0<=k && k<id1) {
534     buffer[k-id0] = SET_ID;
535     }
536     }
537     }
538     }
539     // count the entries in the buffer
540     // TODO: OMP parallel
541     int myNewCount=0;
542     for (int n=0; n<myCount; ++n) {
543     if (buffer[n] == SET_ID) {
544     buffer[n]=myNewCount;
545     myNewCount++;
546     }
547     }
548     return myNewCount;
549 ksteube 1312 }
550    
551 caltinay 4428 int NodeFile::createDenseDOFLabeling()
552     {
553     std::vector<int> DOF_buffer;
554     std::vector<int> distribution;
555     std::vector<int> loc_offsets(MPIInfo->size);
556     std::vector<int> offsets(MPIInfo->size);
557     int new_numGlobalDOFs=0;
558    
559     // retrieve the number of own DOFs and fill buffer
560 caltinay 4496 loc_offsets[MPIInfo->rank]=prepareLabeling(std::vector<short>(),
561     DOF_buffer, distribution, false);
562 caltinay 4428 #ifdef ESYS_MPI
563     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
564     MPI_SUM, MPIInfo->comm);
565     for (int n=0; n<MPIInfo->size; ++n) {
566     loc_offsets[n]=new_numGlobalDOFs;
567     new_numGlobalDOFs+=offsets[n];
568     }
569     #else
570     new_numGlobalDOFs=loc_offsets[0];
571     loc_offsets[0]=0;
572     #endif
573    
574     const int myDOFs=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
575     #pragma omp parallel for
576     for (int n=0; n<myDOFs; ++n)
577     DOF_buffer[n]+=loc_offsets[MPIInfo->rank];
578    
579 jfenwick 4524 std::vector<unsigned char> set_new_DOF(numNodes, true);
580 caltinay 4428
581     // now entries are collected from the buffer again by sending them around
582     // in a circle
583     #ifdef ESYS_MPI
584     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
585     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
586     #endif
587     int buffer_rank=MPIInfo->rank;
588     for (int p=0; p<MPIInfo->size; ++p) {
589     const int dof0=distribution[buffer_rank];
590     const int dof1=distribution[buffer_rank+1];
591     #pragma omp parallel for
592     for (int n=0; n<numNodes; n++) {
593 caltinay 4429 const int k=globalDegreesOfFreedom[n];
594     if (set_new_DOF[n] && dof0<=k && k<dof1) {
595     globalDegreesOfFreedom[n]=DOF_buffer[k-dof0];
596 jfenwick 4521 set_new_DOF[n]=false;
597 caltinay 4429 }
598 caltinay 4428 }
599     if (p<MPIInfo->size-1) { // the last send can be skipped
600     #ifdef ESYS_MPI
601     MPI_Status status;
602     MPI_Sendrecv_replace(&DOF_buffer[0], DOF_buffer.size(), MPI_INT,
603     dest, MPIInfo->msg_tag_counter, source,
604     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
605     #endif
606 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
607 caltinay 4428 }
608     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
609     }
610    
611     return new_numGlobalDOFs;
612 ksteube 1312 }
613    
614 caltinay 4496 int NodeFile::createDenseNodeLabeling(std::vector<int>& nodeDistribution,
615     const std::vector<int>& dofDistribution)
616 caltinay 4428 {
617     const int UNSET_ID=-1, SET_ID=1;
618 caltinay 4496 const int myFirstDOF=dofDistribution[MPIInfo->rank];
619     const int myLastDOF=dofDistribution[MPIInfo->rank+1];
620 caltinay 4428
621     // find the range of node ids controlled by me
622     int min_id=std::numeric_limits<int>::max();
623     int max_id=std::numeric_limits<int>::min();
624     #pragma omp parallel
625     {
626     int loc_max_id=max_id;
627     int loc_min_id=min_id;
628     #pragma omp for
629     for (int n=0; n<numNodes; n++) {
630     const int dof=globalDegreesOfFreedom[n];
631     if (myFirstDOF<=dof && dof<myLastDOF) {
632     loc_max_id=std::max(loc_max_id, Id[n]);
633     loc_min_id=std::min(loc_min_id, Id[n]);
634     }
635     }
636     #pragma omp critical
637     {
638     max_id=std::max(loc_max_id, max_id);
639     min_id=std::min(loc_min_id, min_id);
640     }
641     }
642     int my_buffer_len = (max_id>=min_id ? max_id-min_id+1 : 0);
643     int buffer_len;
644    
645     #ifdef ESYS_MPI
646     MPI_Allreduce(&my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX,
647     MPIInfo->comm);
648     #else
649     buffer_len=my_buffer_len;
650     #endif
651    
652     const int header_len=2;
653     std::vector<int> Node_buffer(buffer_len+header_len, UNSET_ID);
654     // extra storage for these IDs
655     Node_buffer[0]=min_id;
656     Node_buffer[1]=max_id;
657    
658     // mark and count the nodes in use
659 caltinay 4429 #pragma omp parallel for
660     for (int n=0; n<numNodes; n++) {
661     globalNodesIndex[n]=-1;
662     const int dof=globalDegreesOfFreedom[n];
663     if (myFirstDOF<=dof && dof<myLastDOF)
664     Node_buffer[Id[n]-min_id+header_len]=SET_ID;
665 caltinay 4428 }
666     int myNewNumNodes=0;
667     for (int n=0; n<my_buffer_len; n++) {
668     if (Node_buffer[header_len+n]==SET_ID) {
669     Node_buffer[header_len+n]=myNewNumNodes;
670     myNewNumNodes++;
671     }
672     }
673     // make the local number of nodes globally available
674     #ifdef ESYS_MPI
675 caltinay 4496 MPI_Allgather(&myNewNumNodes, 1, MPI_INT, &nodeDistribution[0], 1, MPI_INT,
676 caltinay 4428 MPIInfo->comm);
677     #else
678 caltinay 4496 nodeDistribution[0]=myNewNumNodes;
679 caltinay 4428 #endif
680    
681     int globalNumNodes=0;
682     for (int p=0; p<MPIInfo->size; ++p) {
683 caltinay 4496 const int itmp=nodeDistribution[p];
684     nodeDistribution[p]=globalNumNodes;
685 caltinay 4428 globalNumNodes+=itmp;
686     }
687 caltinay 4496 nodeDistribution[MPIInfo->size]=globalNumNodes;
688 caltinay 4428
689     // offset node buffer
690 caltinay 4429 #pragma omp parallel for
691 caltinay 4428 for (int n=0; n<my_buffer_len; n++)
692 caltinay 4496 Node_buffer[n+header_len]+=nodeDistribution[MPIInfo->rank];
693 caltinay 4428
694     // now we send this buffer around to assign global node index
695     #ifdef ESYS_MPI
696     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
697     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
698     #endif
699     int buffer_rank=MPIInfo->rank;
700     for (int p=0; p<MPIInfo->size; ++p) {
701     const int nodeID_0=Node_buffer[0];
702     const int nodeID_1=Node_buffer[1];
703 caltinay 4496 const int dof0=dofDistribution[buffer_rank];
704     const int dof1=dofDistribution[buffer_rank+1];
705 caltinay 4428 if (nodeID_0 <= nodeID_1) {
706 caltinay 4429 #pragma omp parallel for
707 caltinay 4428 for (int n=0; n<numNodes; n++) {
708     const int dof=globalDegreesOfFreedom[n];
709     const int id=Id[n]-nodeID_0;
710     if (dof0<=dof && dof<dof1 && id>=0 && id<=nodeID_1-nodeID_0)
711     globalNodesIndex[n]=Node_buffer[id+header_len];
712     }
713     }
714     if (p<MPIInfo->size-1) { // the last send can be skipped
715     #ifdef ESYS_MPI
716     MPI_Status status;
717     MPI_Sendrecv_replace(&Node_buffer[0], Node_buffer.size(), MPI_INT,
718     dest, MPIInfo->msg_tag_counter, source,
719     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
720     #endif
721 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
722 caltinay 4428 }
723     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
724     }
725     return globalNumNodes;
726 ksteube 1312 }
727    
728 caltinay 4496 int NodeFile::createDenseReducedLabeling(const std::vector<short>& reducedMask,
729     bool useNodes)
730 caltinay 4428 {
731     std::vector<int> buffer;
732     std::vector<int> distribution;
733     std::vector<int> loc_offsets(MPIInfo->size);
734     std::vector<int> offsets(MPIInfo->size);
735     int new_numGlobalReduced=0;
736    
737     // retrieve the number of own DOFs/nodes and fill buffer
738     loc_offsets[MPIInfo->rank]=prepareLabeling(reducedMask, buffer,
739     distribution, useNodes);
740     #ifdef ESYS_MPI
741     MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_INT,
742     MPI_SUM, MPIInfo->comm);
743     for (int n=0; n<MPIInfo->size; ++n) {
744     loc_offsets[n]=new_numGlobalReduced;
745     new_numGlobalReduced+=offsets[n];
746     }
747     #else
748     new_numGlobalReduced=loc_offsets[0];
749     loc_offsets[0]=0;
750     #endif
751    
752     const int myCount=distribution[MPIInfo->rank+1]-distribution[MPIInfo->rank];
753     #pragma omp parallel for
754     for (int n=0; n<myCount; ++n)
755     buffer[n]+=loc_offsets[MPIInfo->rank];
756    
757     const int* denseArray =
758     (useNodes ? globalNodesIndex : globalDegreesOfFreedom);
759     int* reducedArray =
760     (useNodes ? globalReducedNodesIndex : globalReducedDOFIndex);
761    
762     #pragma omp parallel for
763     for (int n=0; n<numNodes; ++n)
764     reducedArray[n]=loc_offsets[0]-1;
765    
766     // now entries are collected from the buffer by sending them around
767     // in a circle
768     #ifdef ESYS_MPI
769     int dest=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank + 1);
770     int source=Esys_MPIInfo_mod(MPIInfo->size, MPIInfo->rank - 1);
771     #endif
772     int buffer_rank=MPIInfo->rank;
773     for (int p=0; p<MPIInfo->size; ++p) {
774     const int id0=distribution[buffer_rank];
775     const int id1=distribution[buffer_rank+1];
776     #pragma omp parallel for
777     for (int n=0; n<numNodes; n++) {
778     if (reducedMask[n] > -1) {
779     const int k=denseArray[n];
780     if (id0<=k && k<id1)
781     reducedArray[n]=buffer[k-id0];
782     }
783     }
784     if (p<MPIInfo->size-1) { // the last send can be skipped
785     #ifdef ESYS_MPI
786     MPI_Status status;
787     MPI_Sendrecv_replace(&buffer[0], buffer.size(), MPI_INT, dest,
788     MPIInfo->msg_tag_counter, source,
789     MPIInfo->msg_tag_counter, MPIInfo->comm, &status);
790     #endif
791 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, 1)
792 caltinay 4428 }
793     buffer_rank=Esys_MPIInfo_mod(MPIInfo->size, buffer_rank-1);
794     }
795     return new_numGlobalReduced;
796 ksteube 1312 }
797    
798 caltinay 4492 void NodeFile::createDOFMappingAndCoupling(bool use_reduced_elements)
799     {
800     Paso_Distribution* dof_distribution;
801     const int* globalDOFIndex;
802     if (use_reduced_elements) {
803     dof_distribution=reducedDegreesOfFreedomDistribution;
804     globalDOFIndex=globalReducedDOFIndex;
805     } else {
806     dof_distribution=degreesOfFreedomDistribution;
807     globalDOFIndex=globalDegreesOfFreedom;
808     }
809     const int myFirstDOF=Paso_Distribution_getFirstComponent(dof_distribution);
810     const int myLastDOF=Paso_Distribution_getLastComponent(dof_distribution);
811     const int mpiSize=MPIInfo->size;
812     const int myRank=MPIInfo->rank;
813    
814     int min_DOF, max_DOF;
815     std::pair<int,int> DOF_range(util::getFlaggedMinMaxInt(
816     numNodes, globalDOFIndex, -1));
817    
818     if (DOF_range.second < DOF_range.first) {
819     min_DOF=myFirstDOF;
820     max_DOF=myLastDOF-1;
821     } else {
822     min_DOF=DOF_range.first;
823     max_DOF=DOF_range.second;
824     }
825    
826     int p_min=mpiSize;
827     int p_max=-1;
828     if (max_DOF >= min_DOF) {
829     for (int p=0; p<mpiSize; ++p) {
830     if (dof_distribution->first_component[p]<=min_DOF) p_min=p;
831     if (dof_distribution->first_component[p]<=max_DOF) p_max=p;
832     }
833     }
834    
835     if (!((min_DOF<=myFirstDOF) && (myLastDOF-1<=max_DOF))) {
836 caltinay 4496 setError(SYSTEM_ERROR, "Local elements do not span local degrees of freedom.");
837 caltinay 4492 return;
838     }
839     const int UNUSED = -1;
840     const int len_loc_dof=max_DOF-min_DOF+1;
841     std::vector<int> shared(numNodes*(p_max-p_min+1));
842     std::vector<int> offsetInShared(mpiSize+1);
843     std::vector<int> locDOFMask(len_loc_dof, UNUSED);
844    
845     #pragma omp parallel
846     {
847     #pragma omp for
848     for (int i=0;i<numNodes;++i) {
849     const int k=globalDOFIndex[i];
850     if (k > -1) {
851     #ifdef BOUNDS_CHECK
852     if ((k-min_DOF)>=len_loc_dof) {
853     printf("BOUNDS_CHECK %s %d i=%d k=%d min_DOF=%d\n", __FILE__, __LINE__, i, k, min_DOF);
854     exit(1);
855     }
856     #endif
857     locDOFMask[k-min_DOF]=UNUSED-1;
858     }
859     }
860     #ifdef BOUNDS_CHECK
861     if (myLastDOF-min_DOF > len_loc_dof) {
862     printf("BOUNDS_CHECK %s %d\n", __FILE__, __LINE__);
863     exit(1);
864     }
865     #endif
866     #pragma omp for
867     for (int i=myFirstDOF-min_DOF; i<myLastDOF-min_DOF; ++i) {
868     locDOFMask[i]=i-myFirstDOF+min_DOF;
869     }
870     }
871    
872     std::vector<int> wanted_DOFs(numNodes);
873     std::vector<int> rcv_len(mpiSize);
874     std::vector<int> snd_len(mpiSize);
875     std::vector<int> neighbor(mpiSize);
876     int numNeighbors=0;
877     int n=0;
878     int lastn=n;
879     for (int p=p_min; p<=p_max; ++p) {
880     if (p != myRank) {
881     const int firstDOF=std::max(min_DOF, dof_distribution->first_component[p]);
882     const int lastDOF=std::min(max_DOF+1, dof_distribution->first_component[p+1]);
883     #ifdef BOUNDS_CHECK
884     if (firstDOF-min_DOF<0 || lastDOF-min_DOF>len_loc_dof) {
885     printf("BOUNDS_CHECK %s %d p=%d\n", __FILE__, __LINE__, p);
886     exit(1);
887     }
888     #endif
889     for (int i=firstDOF-min_DOF; i<lastDOF-min_DOF; ++i) {
890     if (locDOFMask[i] == UNUSED-1) {
891     locDOFMask[i]=myLastDOF-myFirstDOF+n;
892     wanted_DOFs[n]=i+min_DOF;
893     ++n;
894     }
895     }
896     if (n > lastn) {
897     rcv_len[p]=n-lastn;
898     #ifdef BOUNDS_CHECK
899     if (numNeighbors >= mpiSize+1) {
900     printf("BOUNDS_CHECK %s %d p=%d numNeighbors=%d n=%d\n", __FILE__, __LINE__, p, numNeighbors, n);
901     exit(1);
902     }
903     #endif
904     neighbor[numNeighbors]=p;
905     offsetInShared[numNeighbors]=lastn;
906     numNeighbors++;
907     lastn=n;
908     }
909     } // if p!=myRank
910     } // for p
911    
912     #ifdef BOUNDS_CHECK
913     if (numNeighbors >= mpiSize+1) {
914     printf("BOUNDS_CHECK %s %d numNeighbors=%d\n", __FILE__, __LINE__, numNeighbors);
915     exit(1);
916     }
917     #endif
918     offsetInShared[numNeighbors]=lastn;
919    
920     // assign new DOF labels to nodes
921     std::vector<int> nodeMask(numNodes, UNUSED);
922     #pragma omp parallel for
923     for (int i=0; i<numNodes; ++i) {
924     const int k=globalDOFIndex[i];
925     if (k > -1)
926     nodeMask[i]=locDOFMask[k-min_DOF];
927     }
928    
929     // now we can set the mapping from nodes to local DOFs
930     if (use_reduced_elements) {
931     reducedDegreesOfFreedomMapping.assign(nodeMask, UNUSED);
932     } else {
933     degreesOfFreedomMapping.assign(nodeMask, UNUSED);
934     }
935    
936     // define how to get DOF values for controlled but other processors
937     #ifdef BOUNDS_CHECK
938 caltinay 4496 if (offsetInShared[numNeighbors] >= numNodes*(p_max-p_min+1)) {
939 caltinay 4492 printf("BOUNDS_CHECK %s %d\n", __FILE__, __LINE__);
940     exit(1);
941     }
942     #endif
943     #pragma omp parallel for
944     for (int i=0; i<offsetInShared[numNeighbors]; ++i)
945     shared[i]=myLastDOF-myFirstDOF+i;
946    
947     Paso_SharedComponents *rcv_shcomp=Paso_SharedComponents_alloc(
948     myLastDOF-myFirstDOF, numNeighbors, &neighbor[0], &shared[0],
949     &offsetInShared[0], 1, 0, MPIInfo);
950    
951     /////////////////////////////////
952     // now we build the sender //
953     /////////////////////////////////
954     #ifdef ESYS_MPI
955     std::vector<MPI_Request> mpi_requests(mpiSize*2);
956     std::vector<MPI_Status> mpi_stati(mpiSize*2);
957     MPI_Alltoall(&rcv_len[0], 1, MPI_INT, &snd_len[0], 1, MPI_INT, MPIInfo->comm);
958     int count=0;
959     #else
960     snd_len[0]=rcv_len[0];
961     #endif
962    
963     for (int p=0; p<rcv_shcomp->numNeighbors; p++) {
964     #ifdef ESYS_MPI
965     MPI_Isend(&(wanted_DOFs[rcv_shcomp->offsetInShared[p]]),
966     rcv_shcomp->offsetInShared[p+1]-rcv_shcomp->offsetInShared[p],
967     MPI_INT, rcv_shcomp->neighbor[p],
968     MPIInfo->msg_tag_counter+myRank, MPIInfo->comm,
969     &mpi_requests[count]);
970     count++;
971     #endif
972     }
973     n=0;
974     numNeighbors=0;
975     for (int p=0; p<mpiSize; p++) {
976     if (snd_len[p] > 0) {
977     #ifdef ESYS_MPI
978     MPI_Irecv(&shared[n], snd_len[p], MPI_INT, p,
979     MPIInfo->msg_tag_counter+p, MPIInfo->comm,
980     &mpi_requests[count]);
981     count++;
982     #endif
983     neighbor[numNeighbors]=p;
984     offsetInShared[numNeighbors]=n;
985     numNeighbors++;
986     n+=snd_len[p];
987     }
988     }
989 jfenwick 4505 ESYS_MPI_INC_COUNTER(*MPIInfo, MPIInfo->size)
990 caltinay 4492 offsetInShared[numNeighbors]=n;
991     #ifdef ESYS_MPI
992     MPI_Waitall(count, &mpi_requests[0], &mpi_stati[0]);
993     #endif
994     // map global ids to local id's
995     #pragma omp parallel for
996     for (int i=0; i<offsetInShared[numNeighbors]; ++i) {
997     shared[i]=locDOFMask[shared[i]-min_DOF];
998     }
999    
1000     Paso_SharedComponents* snd_shcomp=Paso_SharedComponents_alloc(
1001     myLastDOF-myFirstDOF, numNeighbors, &neighbor[0], &shared[0],
1002     &offsetInShared[0], 1, 0, MPIInfo);
1003    
1004 caltinay 4496 if (noError()) {
1005 caltinay 4492 if (use_reduced_elements) {
1006     reducedDegreesOfFreedomConnector=Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1007     } else {
1008     degreesOfFreedomConnector=Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1009     }
1010     }
1011    
1012     Paso_SharedComponents_free(rcv_shcomp);
1013     Paso_SharedComponents_free(snd_shcomp);
1014     }
1015    
1016 caltinay 4496 void NodeFile::createNodeMappings(const std::vector<int>& indexReducedNodes,
1017     const std::vector<int>& dofDist,
1018     const std::vector<int>& nodeDist)
1019 caltinay 4492 {
1020     const int mpiSize=MPIInfo->size;
1021     const int myRank=MPIInfo->rank;
1022    
1023     const int myFirstDOF=dofDist[myRank];
1024     const int myLastDOF=dofDist[myRank+1];
1025     const int myNumDOF=myLastDOF-myFirstDOF;
1026    
1027     const int myFirstNode=nodeDist[myRank];
1028     const int myLastNode=nodeDist[myRank+1];
1029     const int myNumNodes=myLastNode-myFirstNode;
1030    
1031 caltinay 4496 std::vector<short> maskMyReducedDOF(myNumDOF, -1);
1032     std::vector<short> maskMyReducedNodes(myNumNodes, -1);
1033 caltinay 4492
1034     // mark the nodes used by the reduced mesh
1035     #pragma omp parallel for
1036 caltinay 4496 for (int i=0; i<indexReducedNodes.size(); ++i) {
1037 caltinay 4492 int k=globalNodesIndex[indexReducedNodes[i]];
1038     if (k>=myFirstNode && myLastNode>k)
1039 caltinay 4496 maskMyReducedNodes[k-myFirstNode]=1;
1040 caltinay 4492 k=globalDegreesOfFreedom[indexReducedNodes[i]];
1041     if (k>=myFirstDOF && myLastDOF>k) {
1042 caltinay 4496 maskMyReducedDOF[k-myFirstDOF]=1;
1043 caltinay 4492 }
1044     }
1045 caltinay 4496 std::vector<int> indexMyReducedDOF = util::packMask(maskMyReducedDOF);
1046     int myNumReducedDOF=indexMyReducedDOF.size();
1047     std::vector<int> indexMyReducedNodes = util::packMask(maskMyReducedNodes);
1048     int myNumReducedNodes=indexMyReducedNodes.size();
1049 caltinay 4492
1050     std::vector<int> rdofDist(mpiSize+1);
1051     std::vector<int> rnodeDist(mpiSize+1);
1052     #ifdef ESYS_MPI
1053     MPI_Allgather(&myNumReducedNodes, 1, MPI_INT, &rnodeDist[0], 1, MPI_INT, MPIInfo->comm);
1054     MPI_Allgather(&myNumReducedDOF, 1, MPI_INT, &rdofDist[0], 1, MPI_INT, MPIInfo->comm);
1055     #else
1056     rnodeDist[0]=myNumReducedNodes;
1057     rdofDist[0]=myNumReducedDOF;
1058     #endif
1059     int globalNumReducedNodes=0;
1060     int globalNumReducedDOF=0;
1061     for (int i=0; i<mpiSize;++i) {
1062     int k=rnodeDist[i];
1063     rnodeDist[i]=globalNumReducedNodes;
1064     globalNumReducedNodes+=k;
1065    
1066     k=rdofDist[i];
1067     rdofDist[i]=globalNumReducedDOF;
1068     globalNumReducedDOF+=k;
1069     }
1070     rnodeDist[mpiSize]=globalNumReducedNodes;
1071     rdofDist[mpiSize]=globalNumReducedDOF;
1072    
1073     // ==== distribution of Nodes ===============================
1074 caltinay 4496 nodesDistribution=Paso_Distribution_alloc(MPIInfo, &nodeDist[0], 1, 0);
1075 caltinay 4492 // ==== distribution of DOFs ================================
1076     degreesOfFreedomDistribution=Paso_Distribution_alloc(MPIInfo, &dofDist[0], 1,0);
1077     // ==== distribution of reduced Nodes =======================
1078     reducedNodesDistribution=Paso_Distribution_alloc(MPIInfo, &rnodeDist[0], 1, 0);
1079     // ==== distribution of reduced DOF =========================
1080     reducedDegreesOfFreedomDistribution=Paso_Distribution_alloc(MPIInfo, &rdofDist[0], 1, 0);
1081    
1082     std::vector<int> nodeMask(numNodes);
1083    
1084 caltinay 4496 if (noError()) {
1085 caltinay 4492 const int UNUSED = -1;
1086     // ==== nodes mapping which is a dummy structure ========
1087     #pragma omp parallel for
1088     for (int i=0; i<numNodes; ++i)
1089     nodeMask[i]=i;
1090     nodesMapping.assign(nodeMask, UNUSED);
1091    
1092     // ==== mapping between nodes and reduced nodes ==========
1093     #pragma omp parallel for
1094     for (int i=0; i<numNodes; ++i)
1095     nodeMask[i]=UNUSED;
1096     #pragma omp parallel for
1097 caltinay 4496 for (int i=0; i<indexReducedNodes.size(); ++i)
1098 caltinay 4492 nodeMask[indexReducedNodes[i]]=i;
1099     reducedNodesMapping.assign(nodeMask, UNUSED);
1100     }
1101     // ==== mapping between nodes and DOFs + DOF connector
1102 caltinay 4496 if (noError())
1103 caltinay 4492 createDOFMappingAndCoupling(false);
1104     // ==== mapping between nodes and reduced DOFs + reduced DOF connector
1105 caltinay 4496 if (noError())
1106 caltinay 4492 createDOFMappingAndCoupling(true);
1107    
1108     // get the Ids for DOFs and reduced nodes
1109 caltinay 4496 if (noError()) {
1110 caltinay 4493 #pragma omp parallel
1111 caltinay 4492 {
1112     #pragma omp for
1113     for (int i=0; i<reducedNodesMapping.getNumTargets(); ++i)
1114     reducedNodesId[i]=Id[reducedNodesMapping.map[i]];
1115     #pragma omp for
1116     for (int i=0; i<degreesOfFreedomMapping.getNumTargets(); ++i)
1117     degreesOfFreedomId[i]=Id[degreesOfFreedomMapping.map[i]];
1118     #pragma omp for
1119     for (int i=0; i<reducedDegreesOfFreedomMapping.getNumTargets(); ++i)
1120     reducedDegreesOfFreedomId[i]=Id[reducedDegreesOfFreedomMapping.map[i]];
1121     }
1122     } else {
1123     Paso_Distribution_free(nodesDistribution);
1124     Paso_Distribution_free(reducedNodesDistribution);
1125     Paso_Distribution_free(degreesOfFreedomDistribution);
1126     Paso_Distribution_free(reducedDegreesOfFreedomDistribution);
1127     Paso_Connector_free(degreesOfFreedomConnector);
1128     Paso_Connector_free(reducedDegreesOfFreedomConnector);
1129     nodesDistribution=NULL;
1130     reducedNodesDistribution=NULL;
1131     degreesOfFreedomDistribution=NULL;
1132     reducedDegreesOfFreedomDistribution=NULL;
1133     degreesOfFreedomConnector=NULL;
1134     reducedDegreesOfFreedomConnector=NULL;
1135     }
1136     }
1137    
1138 caltinay 4428 } // namespace finley
1139 ksteube 1312

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/lapack2681/finley/src/NodeFile.cpp:2682-2741 /branches/pasowrap/finley/src/NodeFile.cpp:3661-3674 /branches/py3_attempt2/finley/src/NodeFile.cpp:3871-3891 /branches/restext/finley/src/NodeFile.cpp:2610-2624 /branches/ripleygmg_from_3668/finley/src/NodeFile.cpp:3669-3791 /branches/stage3.0/finley/src/NodeFile.cpp:2569-2590 /branches/symbolic_from_3470/finley/src/NodeFile.cpp:3471-3974 /release/3.0/finley/src/NodeFile.cpp:2591-2601 /trunk/finley/src/NodeFile.cpp:4257-4344

  ViewVC Help
Powered by ViewVC 1.1.26