/[escript]/branches/trilinos_from_5897/dudley/src/NodeFile_createDenseLabelings.cpp
ViewVC logotype

Diff of /branches/trilinos_from_5897/dudley/src/NodeFile_createDenseLabelings.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 6078 by caltinay, Wed Mar 2 04:13:26 2016 UTC revision 6079 by caltinay, Mon Mar 21 12:22:38 2016 UTC
# Line 14  Line 14 
14  *  *
15  *****************************************************************************/  *****************************************************************************/
16    
 /****************************************************************************/  
   
 /*   Dudley: Mesh: NodeFile                                   */  
   
 /*   creates a dense labeling of the global degrees of freedom  */  
 /*   and returns the new number of  global degrees of freedom  */  
   
 /****************************************************************************/  
   
17  #include "NodeFile.h"  #include "NodeFile.h"
18    
19  namespace dudley {  namespace dudley {
20    
21  dim_t Dudley_NodeFile_createDenseDOFLabeling(Dudley_NodeFile* in)  dim_t NodeFile::createDenseDOFLabeling()
22  {  {
23      index_t min_dof, max_dof, unset_dof = -1, set_dof = 1, dof_0, dof_1, *DOF_buffer = NULL, k;      const index_t UNSET_ID = -1, SET_ID = 1;
     int buffer_rank, *distribution = NULL;  
     dim_t p, buffer_len, n, myDOFs, *offsets = NULL, *loc_offsets = NULL, new_numGlobalDOFs = 0, myNewDOFs;  
     bool *set_new_DOF = NULL;  
 #ifdef ESYS_MPI  
     int dest, source;  
     MPI_Status status;  
 #endif  
24    
25      /* get the global range of node ids */      // get the global range of DOF IDs
26      Dudley_NodeFile_setGlobalDOFRange(&min_dof, &max_dof, in);      const std::pair<index_t,index_t> idRange(getGlobalDOFRange());
27    
28      distribution = new index_t[in->MPIInfo->size + 1];      // distribute the range of DOF IDs
29      offsets = new dim_t[in->MPIInfo->size];      std::vector<index_t> distribution(MPIInfo->size + 1);
30      loc_offsets = new dim_t[in->MPIInfo->size];      dim_t bufferLen = MPIInfo->setDistribution(idRange.first, idRange.second,
31      set_new_DOF = new bool[in->numNodes];                                                &distribution[0]);
32        const dim_t myDOFs = distribution[MPIInfo->rank + 1] - distribution[MPIInfo->rank];
33      /* distribute the range of node ids */  
34      buffer_len = in->MPIInfo->setDistribution(min_dof, max_dof, distribution);      index_t* DOF_buffer = new index_t[bufferLen];
35      myDOFs = distribution[in->MPIInfo->rank + 1] - distribution[in->MPIInfo->rank];      // fill buffer by the UNSET_ID marker to check if nodes are defined
36      /* allocate buffers */  #pragma omp parallel for
37      DOF_buffer = new  index_t[buffer_len];      for (index_t n = 0; n < bufferLen; n++)
38      /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */          DOF_buffer[n] = UNSET_ID;
 #pragma omp parallel for private(n) schedule(static)  
     for (n = 0; n < buffer_len; n++)  
         DOF_buffer[n] = unset_dof;  
39    
40      /* fill the buffer by sending portions around in a circle */      // fill the buffer by sending portions around in a circle
41  #ifdef ESYS_MPI  #ifdef ESYS_MPI
42      dest = in->MPIInfo->mod_rank(in->MPIInfo->rank + 1);      MPI_Status status;
43      source = in->MPIInfo->mod_rank(in->MPIInfo->rank - 1);      int dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
44        int source = MPIInfo->mod_rank(MPIInfo->rank - 1);
45  #endif  #endif
46      buffer_rank = in->MPIInfo->rank;      int buffer_rank = MPIInfo->rank;
47      for (p = 0; p < in->MPIInfo->size; ++p) {      for (int p = 0; p < MPIInfo->size; ++p) {
         if (p > 0) { /* the initial send can be skipped */  
48  #ifdef ESYS_MPI  #ifdef ESYS_MPI
49              MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,          if (p > 0) { // the initial send can be skipped
50                                   dest, in->MPIInfo->counter(), source, in->MPIInfo->counter(),              MPI_Sendrecv_replace(DOF_buffer, bufferLen, MPI_DIM_T, dest,
51                                   in->MPIInfo->comm, &status);                                   MPIInfo->counter(), source, MPIInfo->counter(),
52              in->MPIInfo->incCounter();                                   MPIInfo->comm, &status);
53  #endif              MPIInfo->incCounter();
54          }          }
55          buffer_rank = in->MPIInfo->mod_rank(buffer_rank - 1);  #endif
56          dof_0 = distribution[buffer_rank];          buffer_rank = MPIInfo->mod_rank(buffer_rank - 1);
57          dof_1 = distribution[buffer_rank + 1];          const index_t dof0 = distribution[buffer_rank];
58  #pragma omp parallel for private(n,k) schedule(static)          const index_t dof1 = distribution[buffer_rank + 1];
59          for (n = 0; n < in->numNodes; n++) {  #pragma omp parallel for
60              k = in->globalDegreesOfFreedom[n];          for (index_t n = 0; n < numNodes; n++) {
61              if ((dof_0 <= k) && (k < dof_1)) {              const index_t k = globalDegreesOfFreedom[n];
62                  DOF_buffer[k - dof_0] = set_dof;              if (dof0 <= k && k < dof1) {
63                    DOF_buffer[k - dof0] = SET_ID;
64              }              }
65          }          }
66      }      }
67      /* count the entries in the DOF_buffer */      // count the entries in the buffer
68      /* TODO: OMP parallel */      // TODO: OMP parallel
69      myNewDOFs = 0;      dim_t myNewDOFs = 0;
70      for (n = 0; n < myDOFs; ++n) {      for (index_t n = 0; n < myDOFs; ++n) {
71          if (DOF_buffer[n] == set_dof) {          if (DOF_buffer[n] == SET_ID) {
72              DOF_buffer[n] = myNewDOFs;              DOF_buffer[n] = myNewDOFs;
73              myNewDOFs++;              myNewDOFs++;
74          }          }
75      }      }
76      memset(loc_offsets, 0, in->MPIInfo->size * sizeof(dim_t));  
77      loc_offsets[in->MPIInfo->rank] = myNewDOFs;      std::vector<index_t> loc_offsets(MPIInfo->size);
78        std::vector<index_t> offsets(MPIInfo->size);
79        dim_t new_numGlobalDOFs;
80        bool* set_new_DOF = new bool[numNodes];
81    
82  #ifdef ESYS_MPI  #ifdef ESYS_MPI
     MPI_Allreduce(loc_offsets, offsets, in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm);  
83      new_numGlobalDOFs = 0;      new_numGlobalDOFs = 0;
84      for (n = 0; n < in->MPIInfo->size; ++n) {      loc_offsets[MPIInfo->rank] = myNewDOFs;
85        MPI_Allreduce(&loc_offsets[0], &offsets[0], MPIInfo->size, MPI_DIM_T,
86                      MPI_SUM, MPIInfo->comm);
87        for (int n = 0; n < MPIInfo->size; ++n) {
88          loc_offsets[n] = new_numGlobalDOFs;          loc_offsets[n] = new_numGlobalDOFs;
89          new_numGlobalDOFs += offsets[n];          new_numGlobalDOFs += offsets[n];
90      }      }
91  #else  #else
92      new_numGlobalDOFs = loc_offsets[0];      new_numGlobalDOFs = myNewDOFs;
     loc_offsets[0] = 0;  
93  #endif  #endif
94    
95  #pragma omp parallel  #pragma omp parallel
96      {      {
97  #pragma omp for private(n) schedule(static)  #pragma omp for
98          for (n = 0; n < myDOFs; ++n)          for (index_t n = 0; n < myDOFs; ++n)
99              DOF_buffer[n] += loc_offsets[in->MPIInfo->rank];              DOF_buffer[n] += loc_offsets[MPIInfo->rank];
100          /* now entries are collected from the buffer again by sending the entries around in a circle */  #pragma omp for
101  #pragma omp for private(n) schedule(static)          for (index_t n = 0; n < numNodes; ++n)
         for (n = 0; n < in->numNodes; ++n)  
102              set_new_DOF[n] = true;              set_new_DOF[n] = true;
103      }      }
104    
105        // now entries are collected from the buffer again by sending them around
106        // in a circle
107  #ifdef ESYS_MPI  #ifdef ESYS_MPI
108      dest = in->MPIInfo->mod_rank(in->MPIInfo->rank + 1);      dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
109      source = in->MPIInfo->mod_rank(in->MPIInfo->rank - 1);      source = MPIInfo->mod_rank(MPIInfo->rank - 1);
110  #endif  #endif
111      buffer_rank = in->MPIInfo->rank;      buffer_rank = MPIInfo->rank;
112      for (p = 0; p < in->MPIInfo->size; ++p) {      for (int p = 0; p < MPIInfo->size; ++p) {
113          dof_0 = distribution[buffer_rank];          const index_t dof0 = distribution[buffer_rank];
114          dof_1 = distribution[buffer_rank + 1];          const index_t dof1 = distribution[buffer_rank + 1];
115  #pragma omp parallel for private(n,k) schedule(static)  #pragma omp parallel for
116          for (n = 0; n < in->numNodes; n++) {          for (index_t n = 0; n < numNodes; n++) {
117              k = in->globalDegreesOfFreedom[n];              const index_t k = globalDegreesOfFreedom[n];
118              if (set_new_DOF[n] && (dof_0 <= k) && (k < dof_1)) {              if (set_new_DOF[n] && dof0 <= k && k < dof1) {
119                  in->globalDegreesOfFreedom[n] = DOF_buffer[k - dof_0];                  globalDegreesOfFreedom[n] = DOF_buffer[k - dof0];
120                  set_new_DOF[n] = false;                  set_new_DOF[n] = false;
121              }              }
122          }          }
         if (p < in->MPIInfo->size - 1) { /* the last send can be skipped */  
123  #ifdef ESYS_MPI  #ifdef ESYS_MPI
124              MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,          if (p < MPIInfo->size - 1) { // the last send can be skipped
125                                   dest, in->MPIInfo->counter(), source, in->MPIInfo->counter(),              MPI_Sendrecv_replace(DOF_buffer, bufferLen, MPI_DIM_T, dest,
126                                   in->MPIInfo->comm, &status);                                   MPIInfo->counter(), source, MPIInfo->counter(),
127              in->MPIInfo->incCounter();                                   MPIInfo->comm, &status);
128  #endif              MPIInfo->incCounter();
129          }          }
130          buffer_rank = in->MPIInfo->mod_rank(buffer_rank - 1);  #endif
131            buffer_rank = MPIInfo->mod_rank(buffer_rank - 1);
132      }      }
133      delete[] DOF_buffer;      delete[] DOF_buffer;
     delete[] distribution;  
     delete[] loc_offsets;  
     delete[] offsets;  
134      delete[] set_new_DOF;      delete[] set_new_DOF;
135      return new_numGlobalDOFs;      return new_numGlobalDOFs;
136  }  }
137    
138  void Dudley_NodeFile_assignMPIRankToDOFs(Dudley_NodeFile * in, int * mpiRankOfDOF, index_t * distribution)  dim_t NodeFile::createDenseNodeLabeling(std::vector<index_t>& nodeDistribution,
139  {                                     const std::vector<index_t>& dofDistribution)
     index_t min_DOF, max_DOF, k;  
     dim_t n;  
     int p, p_min = in->MPIInfo->size, p_max = -1;  
     /* first we calculate the min and max dof on this processor to reduce costs for searching */  
     Dudley_NodeFile_setDOFRange(&min_DOF, &max_DOF, in);  
   
     for (p = 0; p < in->MPIInfo->size; ++p) {  
         if (distribution[p] <= min_DOF)  
             p_min = p;  
         if (distribution[p] <= max_DOF)  
             p_max = p;  
     }  
 #pragma omp parallel for private(n,k,p) schedule(static)  
     for (n = 0; n < in->numNodes; ++n) {  
         k = in->globalDegreesOfFreedom[n];  
         for (p = p_min; p <= p_max; ++p) {  
             if (k < distribution[p + 1]) {  
                 mpiRankOfDOF[n] = p;  
                 break;  
             }  
         }  
     }  
 }  
   
 dim_t Dudley_NodeFile_createDenseReducedDOFLabeling(Dudley_NodeFile * in, index_t * reducedNodeMask)  
140  {  {
141      index_t min_dof, max_dof, unset_dof = -1, set_dof = 1, dof_0, dof_1, *DOF_buffer = NULL, k;      const index_t UNSET_ID = -1, SET_ID = 1;
142      int buffer_rank, *distribution = NULL;      const index_t myFirstDOF = dofDistribution[MPIInfo->rank];
143      dim_t p, buffer_len, n, myDOFs, *offsets = NULL, *loc_offsets = NULL, globalNumReducedDOFs = 0, myNewDOFs;      const index_t myLastDOF = dofDistribution[MPIInfo->rank + 1];
144  #ifdef ESYS_MPI  
145      int dest, source;      // find the range of node IDs controlled by me
146      MPI_Status status;      index_t min_id = escript::DataTypes::index_t_max();
147  #endif      index_t max_id = escript::DataTypes::index_t_min();
148    #pragma omp parallel
     /* get the global range of node ids */  
     Dudley_NodeFile_setGlobalDOFRange(&min_dof, &max_dof, in);  
   
     distribution = new index_t[in->MPIInfo->size + 1];  
     offsets = new dim_t[in->MPIInfo->size];  
     loc_offsets = new dim_t[in->MPIInfo->size];  
   
     /* distribute the range of node ids */  
     buffer_len = in->MPIInfo->setDistribution(min_dof, max_dof, distribution);  
     myDOFs = distribution[in->MPIInfo->rank + 1] - distribution[in->MPIInfo->rank];  
     /* allocate buffers */  
     DOF_buffer = new  index_t[buffer_len];  
   
     /* fill DOF_buffer by the unset_dof marker to check if nodes are defined */  
 #pragma omp parallel for private(n) schedule(static)  
     for (n = 0; n < buffer_len; n++)  
         DOF_buffer[n] = unset_dof;  
   
     /* fill the buffer by sending portions around in a circle */  
 #ifdef ESYS_MPI  
     dest = in->MPIInfo->mod_rank(in->MPIInfo->rank + 1);  
     source = in->MPIInfo->mod_rank(in->MPIInfo->rank - 1);  
 #endif  
     buffer_rank = in->MPIInfo->rank;  
     for (p = 0; p < in->MPIInfo->size; ++p) {  
         if (p > 0) { /* the initial send can be skipped */  
 #ifdef ESYS_MPI  
             MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,  
                                  dest, in->MPIInfo->counter(), source, in->MPIInfo->counter(),  
                                  in->MPIInfo->comm, &status);  
             in->MPIInfo->incCounter();  
 #endif  
         }  
         buffer_rank = in->MPIInfo->mod_rank(buffer_rank - 1);  
         dof_0 = distribution[buffer_rank];  
         dof_1 = distribution[buffer_rank + 1];  
 #pragma omp parallel for private(n,k) schedule(static)  
         for (n = 0; n < in->numNodes; n++) {  
             if (reducedNodeMask[n] > -1) {  
                 k = in->globalDegreesOfFreedom[n];  
                 if ((dof_0 <= k) && (k < dof_1)) {  
                     DOF_buffer[k - dof_0] = set_dof;  
                 }  
             }  
         }  
     }  
     /* count the entries in the DOF_buffer */  
     /* TODO: OMP parallel */  
     myNewDOFs = 0;  
     for (n = 0; n < myDOFs; ++n) {  
         if (DOF_buffer[n] == set_dof) {  
             DOF_buffer[n] = myNewDOFs;  
             myNewDOFs++;  
         }  
     }  
     memset(loc_offsets, 0, in->MPIInfo->size * sizeof(dim_t));  
     loc_offsets[in->MPIInfo->rank] = myNewDOFs;  
 #ifdef ESYS_MPI  
     MPI_Allreduce(loc_offsets, offsets, in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm);  
     globalNumReducedDOFs = 0;  
     for (n = 0; n < in->MPIInfo->size; ++n) {  
         loc_offsets[n] = globalNumReducedDOFs;  
         globalNumReducedDOFs += offsets[n];  
     }  
 #else  
     globalNumReducedDOFs = loc_offsets[0];  
     loc_offsets[0] = 0;  
 #endif  
 #pragma omp parallel for private(n) schedule(static)  
     for (n = 0; n < myDOFs; ++n)  
         DOF_buffer[n] += loc_offsets[in->MPIInfo->rank];  
     /* now entries are collected from the buffer again by sending the entries around in a circle */  
 #pragma omp parallel for private(n) schedule(static)  
     for (n = 0; n < in->numNodes; ++n)  
         in->globalReducedDOFIndex[n] = loc_offsets[0] - 1;  
 #ifdef ESYS_MPI  
     dest = in->MPIInfo->mod_rank(in->MPIInfo->rank + 1);  
     source = in->MPIInfo->mod_rank(in->MPIInfo->rank - 1);  
 #endif  
     buffer_rank = in->MPIInfo->rank;  
     for (p = 0; p < in->MPIInfo->size; ++p) {  
         dof_0 = distribution[buffer_rank];  
         dof_1 = distribution[buffer_rank + 1];  
 #pragma omp parallel for private(n,k) schedule(static)  
         for (n = 0; n < in->numNodes; n++) {  
             if (reducedNodeMask[n] > -1) {  
                 k = in->globalDegreesOfFreedom[n];  
                 if ((dof_0 <= k) && (k < dof_1))  
                     in->globalReducedDOFIndex[n] = DOF_buffer[k - dof_0];  
             }  
         }  
         if (p < in->MPIInfo->size - 1) { /* the last send can be skipped */  
 #ifdef ESYS_MPI  
             MPI_Sendrecv_replace(DOF_buffer, buffer_len, MPI_INT,  
                                  dest, in->MPIInfo->counter(), source, in->MPIInfo->counter(),  
                                  in->MPIInfo->comm, &status);  
             in->MPIInfo->incCounter();  
 #endif  
         }  
         buffer_rank = in->MPIInfo->mod_rank(buffer_rank - 1);  
     }  
     delete[] DOF_buffer;  
     delete[] distribution;  
     delete[] loc_offsets;  
     delete[] offsets;  
     return globalNumReducedDOFs;  
 }  
   
 dim_t Dudley_NodeFile_createDenseNodeLabeling(Dudley_NodeFile * in, index_t * node_distribution,  
                                               const index_t * dof_distribution)  
 {  
     index_t myFirstDOF, myLastDOF, max_id, min_id, loc_max_id, loc_min_id, dof, id, itmp, nodeID_0, nodeID_1, dof_0,  
         dof_1, *Node_buffer = NULL;  
     dim_t n, my_buffer_len, buffer_len, globalNumNodes = 0, myNewNumNodes;  
     int p, buffer_rank;  
     const index_t unset_nodeID = -1, set_nodeID = 1;  
     const dim_t header_len = 2;  
 #ifdef ESYS_MPI  
     int dest, source;  
     MPI_Status status;  
 #endif  
     int myRank = in->MPIInfo->rank;  
   
     /* find the range of node ids controlled by me */  
   
     myFirstDOF = dof_distribution[myRank];  
     myLastDOF = dof_distribution[myRank + 1];  
     max_id = -escript::DataTypes::index_t_max();  
     min_id = escript::DataTypes::index_t_max();  
 #pragma omp parallel private(loc_max_id,loc_min_id)  
149      {      {
150          loc_max_id = max_id;          index_t loc_min_id = min_id;
151          loc_min_id = min_id;          index_t loc_max_id = max_id;
152  #pragma omp for private(n,dof,id) schedule(static)  #pragma omp for
153          for (n = 0; n < in->numNodes; n++) {          for (index_t n = 0; n < numNodes; n++) {
154              dof = in->globalDegreesOfFreedom[n];              const index_t dof = globalDegreesOfFreedom[n];
155              id = in->Id[n];              if (myFirstDOF <= dof && dof < myLastDOF) {
156              if ((myFirstDOF <= dof) && (dof < myLastDOF))                  loc_min_id = std::min(loc_min_id, Id[n]);
157              {                  loc_max_id = std::max(loc_max_id, Id[n]);
                 loc_max_id = std::max(loc_max_id, id);  
                 loc_min_id = std::min(loc_min_id, id);  
158              }              }
159          }          }
160  #pragma omp critical  #pragma omp critical
161          {          {
             max_id = std::max(loc_max_id, max_id);  
162              min_id = std::min(loc_min_id, min_id);              min_id = std::min(loc_min_id, min_id);
163                max_id = std::max(loc_max_id, max_id);
164          }          }
165      }      }
166      /* allocate a buffer */      dim_t myBufferLen = (max_id >= min_id ? max_id - min_id + 1 : 0);
167      my_buffer_len = max_id >= min_id ? max_id - min_id + 1 : 0;      dim_t bufferLen;
168    
169  #ifdef ESYS_MPI  #ifdef ESYS_MPI
170      MPI_Allreduce(&my_buffer_len, &buffer_len, 1, MPI_INT, MPI_MAX, in->MPIInfo->comm);      MPI_Allreduce(&myBufferLen, &bufferLen, 1, MPI_DIM_T, MPI_MAX,
171                      MPIInfo->comm);
172  #else  #else
173      buffer_len = my_buffer_len;      bufferLen = myBufferLen;
174  #endif  #endif
175    
176      Node_buffer = new index_t[buffer_len + header_len];      const dim_t headerLen = 2;
177      /* mark and count the nodes in use */  
178        index_t* Node_buffer = new index_t[bufferLen + headerLen];
179        // mark and count the nodes in use
180  #pragma omp parallel  #pragma omp parallel
181      {      {
182  #pragma omp for private(n) schedule(static)  #pragma omp for
183          for (n = 0; n < buffer_len + header_len; n++)          for (index_t n = 0; n < bufferLen + headerLen; n++)
184              Node_buffer[n] = unset_nodeID;              Node_buffer[n] = UNSET_ID;
185  #pragma omp for private(n) schedule(static)  #pragma omp for
186          for (n = 0; n < in->numNodes; n++)          for (index_t n = 0; n < numNodes; n++) {
187              in->globalNodesIndex[n] = -1;              globalNodesIndex[n] = -1;
188  #pragma omp for private(n,dof,id) schedule(static)              const index_t dof = globalDegreesOfFreedom[n];
189          for (n = 0; n < in->numNodes; n++) {              if (myFirstDOF <= dof && dof < myLastDOF)
190              dof = in->globalDegreesOfFreedom[n];                  Node_buffer[Id[n] - min_id + headerLen] = SET_ID;
             id = in->Id[n];  
             if ((myFirstDOF <= dof) && (dof < myLastDOF))  
                 Node_buffer[id - min_id + header_len] = set_nodeID;  
191          }          }
192      }      }
193      myNewNumNodes = 0;      dim_t myNewNumNodes = 0;
194      for (n = 0; n < my_buffer_len; n++) {      for (index_t n = 0; n < myBufferLen; n++) {
195          if (Node_buffer[header_len + n] == set_nodeID) {          if (Node_buffer[headerLen + n] == SET_ID) {
196              Node_buffer[header_len + n] = myNewNumNodes;              Node_buffer[headerLen + n] = myNewNumNodes;
197              myNewNumNodes++;              myNewNumNodes++;
198          }          }
199      }      }
200      /* make the local number of nodes globally available */      // make the local number of nodes globally available
201  #ifdef ESYS_MPI  #ifdef ESYS_MPI
202      MPI_Allgather(&myNewNumNodes, 1, MPI_INT, node_distribution, 1, MPI_INT, in->MPIInfo->comm);      MPI_Allgather(&myNewNumNodes, 1, MPI_DIM_T, &nodeDistribution[0], 1,
203                      MPI_DIM_T, MPIInfo->comm);
204  #else  #else
205      node_distribution[0] = myNewNumNodes;      nodeDistribution[0] = myNewNumNodes;
206  #endif  #endif
207    
208      globalNumNodes = 0;      dim_t globalNumNodes = 0;
209      for (p = 0; p < in->MPIInfo->size; ++p) {      for (int p = 0; p < MPIInfo->size; ++p) {
210          itmp = node_distribution[p];          const dim_t itmp = nodeDistribution[p];
211          node_distribution[p] = globalNumNodes;          nodeDistribution[p] = globalNumNodes;
212          globalNumNodes += itmp;          globalNumNodes += itmp;
213      }      }
214      node_distribution[in->MPIInfo->size] = globalNumNodes;      nodeDistribution[MPIInfo->size] = globalNumNodes;
215    
216      /* offset nodebuffer */      // offset node buffer
217      itmp = node_distribution[in->MPIInfo->rank];  #pragma omp parallel for
218  #pragma omp for private(n) schedule(static)      for (index_t n = 0; n < myBufferLen; n++)
219      for (n = 0; n < my_buffer_len; n++)          Node_buffer[n + headerLen] += nodeDistribution[MPIInfo->rank];
         Node_buffer[n + header_len] += itmp;  
220    
221      /* now we send this buffer around to assign global node index: */      // now we send this buffer around to assign global node index
222  #ifdef ESYS_MPI  #ifdef ESYS_MPI
223      dest = in->MPIInfo->mod_rank(in->MPIInfo->rank + 1);      int dest = MPIInfo->mod_rank(MPIInfo->rank + 1);
224      source = in->MPIInfo->mod_rank(in->MPIInfo->rank - 1);      int source = MPIInfo->mod_rank(MPIInfo->rank - 1);
225  #endif  #endif
226      Node_buffer[0] = min_id;      Node_buffer[0] = min_id;
227      Node_buffer[1] = max_id;      Node_buffer[1] = max_id;
228      buffer_rank = in->MPIInfo->rank;      int buffer_rank = MPIInfo->rank;
229      for (p = 0; p < in->MPIInfo->size; ++p) {      for (int p = 0; p < MPIInfo->size; ++p) {
230          nodeID_0 = Node_buffer[0];          const index_t nodeID0 = Node_buffer[0];
231          nodeID_1 = Node_buffer[1];          const index_t nodeID1 = Node_buffer[1];
232          dof_0 = dof_distribution[buffer_rank];          const index_t dof0 = dofDistribution[buffer_rank];
233          dof_1 = dof_distribution[buffer_rank + 1];          const index_t dof1 = dofDistribution[buffer_rank + 1];
234          if (nodeID_0 <= nodeID_1) {          if (nodeID0 <= nodeID1) {
235  #pragma omp for private(n,dof,id) schedule(static)  #pragma omp parallel for
236              for (n = 0; n < in->numNodes; n++) {              for (index_t n = 0; n < numNodes; n++) {
237                  dof = in->globalDegreesOfFreedom[n];                  const index_t dof = globalDegreesOfFreedom[n];
238                  id = in->Id[n] - nodeID_0;                  const index_t id = Id[n] - nodeID0;
239                  if ((dof_0 <= dof) && (dof < dof_1) && (id >= 0) && (id <= nodeID_1 - nodeID_0))                  if (dof0 <= dof && dof < dof1 && id >= 0 &&
240                      in->globalNodesIndex[n] = Node_buffer[id + header_len];                          id <= nodeID1 - nodeID0)
241                        globalNodesIndex[n] = Node_buffer[id + headerLen];
242              }              }
243          }          }
         if (p < in->MPIInfo->size - 1) { /* the last send can be skipped */  
244  #ifdef ESYS_MPI  #ifdef ESYS_MPI
245              MPI_Sendrecv_replace(Node_buffer, buffer_len + header_len, MPI_INT,          if (p < MPIInfo->size - 1) { // the last send can be skipped
246                                   dest, in->MPIInfo->counter(), source, in->MPIInfo->counter(),              MPI_Status status;
247                                   in->MPIInfo->comm, &status);              MPI_Sendrecv_replace(Node_buffer, bufferLen + headerLen, MPI_DIM_T,
248              in->MPIInfo->incCounter();                                   dest, MPIInfo->counter(), source,
249  #endif                                   MPIInfo->counter(), MPIInfo->comm, &status);
250                MPIInfo->incCounter();
251          }          }
252          buffer_rank = in->MPIInfo->mod_rank(buffer_rank - 1);  #endif
253            buffer_rank = MPIInfo->mod_rank(buffer_rank - 1);
254      }      }
255      delete[] Node_buffer;      delete[] Node_buffer;
256      return globalNumNodes;      return globalNumNodes;
257  }  }
258    
 dim_t Dudley_NodeFile_createDenseReducedNodeLabeling(Dudley_NodeFile * in, index_t * reducedNodeMask)  
 {  
     index_t min_node, max_node, unset_node = -1, set_node = 1, node_0, node_1, *Nodes_buffer = NULL, k;  
     int buffer_rank, *distribution = NULL;  
     dim_t p, buffer_len, n, myNodes, *offsets = NULL, *loc_offsets = NULL, globalNumReducedNodes = 0, myNewNodes;  
 #ifdef ESYS_MPI  
     int dest, source;  
     MPI_Status status;  
 #endif  
   
     /* get the global range of node ids */  
     Dudley_NodeFile_setGlobalNodeIDIndexRange(&min_node, &max_node, in);  
   
     distribution = new index_t[in->MPIInfo->size + 1];  
     offsets = new dim_t[in->MPIInfo->size];  
     loc_offsets = new dim_t[in->MPIInfo->size];  
   
     /* distribute the range of node ids */  
     buffer_len = in->MPIInfo->setDistribution(min_node, max_node, distribution);  
     myNodes = distribution[in->MPIInfo->rank + 1] - distribution[in->MPIInfo->rank];  
     /* allocate buffers */  
     Nodes_buffer = new index_t[buffer_len];  
     /* fill Nodes_buffer by the unset_node marker to check if nodes are defined */  
 #pragma omp parallel for private(n) schedule(static)  
     for (n = 0; n < buffer_len; n++)  
         Nodes_buffer[n] = unset_node;  
   
     /* fill the buffer by sending portions around in a circle */  
 #ifdef ESYS_MPI  
     dest = in->MPIInfo->mod_rank(in->MPIInfo->rank + 1);  
     source = in->MPIInfo->mod_rank(in->MPIInfo->rank - 1);  
 #endif  
     buffer_rank = in->MPIInfo->rank;  
     for (p = 0; p < in->MPIInfo->size; ++p) {  
         if (p > 0) { /* the initial send can be skipped */  
 #ifdef ESYS_MPI  
             MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,  
                                  dest, in->MPIInfo->counter(), source, in->MPIInfo->counter(),  
                                  in->MPIInfo->comm, &status);  
             in->MPIInfo->incCounter();  
 #endif  
         }  
         buffer_rank = in->MPIInfo->mod_rank(buffer_rank - 1);  
         node_0 = distribution[buffer_rank];  
         node_1 = distribution[buffer_rank + 1];  
 #pragma omp parallel for private(n,k) schedule(static)  
         for (n = 0; n < in->numNodes; n++) {  
             if (reducedNodeMask[n] > -1) {  
                 k = in->globalNodesIndex[n];  
                 if ((node_0 <= k) && (k < node_1)) {  
                     Nodes_buffer[k - node_0] = set_node;  
                 }  
             }  
         }  
     }  
     /* count the entries in the Nodes_buffer */  
     /* TODO: OMP parallel */  
     myNewNodes = 0;  
     for (n = 0; n < myNodes; ++n) {  
         if (Nodes_buffer[n] == set_node) {  
             Nodes_buffer[n] = myNewNodes;  
             myNewNodes++;  
         }  
     }  
     memset(loc_offsets, 0, in->MPIInfo->size * sizeof(dim_t));  
     loc_offsets[in->MPIInfo->rank] = myNewNodes;  
 #ifdef ESYS_MPI  
     MPI_Allreduce(loc_offsets, offsets, in->MPIInfo->size, MPI_INT, MPI_SUM, in->MPIInfo->comm);  
     globalNumReducedNodes = 0;  
     for (n = 0; n < in->MPIInfo->size; ++n) {  
         loc_offsets[n] = globalNumReducedNodes;  
         globalNumReducedNodes += offsets[n];  
     }  
 #else  
     globalNumReducedNodes = loc_offsets[0];  
     loc_offsets[0] = 0;  
 #endif  
 #pragma omp parallel for private(n) schedule(static)  
     for (n = 0; n < myNodes; ++n)  
         Nodes_buffer[n] += loc_offsets[in->MPIInfo->rank];  
     /* now entries are collected from the buffer again by sending the entries around in a circle */  
 #pragma omp parallel for private(n) schedule(static)  
     for (n = 0; n < in->numNodes; ++n)  
         in->globalReducedNodesIndex[n] = loc_offsets[0] - 1;  
 #ifdef ESYS_MPI  
     dest = in->MPIInfo->mod_rank(in->MPIInfo->rank + 1);  
     source = in->MPIInfo->mod_rank(in->MPIInfo->rank - 1);  
 #endif  
     buffer_rank = in->MPIInfo->rank;  
     for (p = 0; p < in->MPIInfo->size; ++p) {  
         node_0 = distribution[buffer_rank];  
         node_1 = distribution[buffer_rank + 1];  
 #pragma omp parallel for private(n,k) schedule(static)  
         for (n = 0; n < in->numNodes; n++) {  
             if (reducedNodeMask[n] > -1) {  
                 k = in->globalNodesIndex[n];  
                 if ((node_0 <= k) && (k < node_1))  
                     in->globalReducedNodesIndex[n] = Nodes_buffer[k - node_0];  
             }  
         }  
         if (p < in->MPIInfo->size - 1) {  
             /* the last send can be skipped */  
 #ifdef ESYS_MPI  
             MPI_Sendrecv_replace(Nodes_buffer, buffer_len, MPI_INT,  
                                  dest, in->MPIInfo->counter(), source, in->MPIInfo->counter(),  
                                  in->MPIInfo->comm, &status);  
             in->MPIInfo->incCounter();  
 #endif  
         }  
         buffer_rank = in->MPIInfo->mod_rank(buffer_rank - 1);  
     }  
     delete[] Nodes_buffer;  
     delete[] distribution;  
     delete[] loc_offsets;  
     delete[] offsets;  
     return globalNumReducedNodes;  
 }  
   
259  } // namespace dudley  } // namespace dudley
260    

Legend:
Removed from v.6078  
changed lines
  Added in v.6079

  ViewVC Help
Powered by ViewVC 1.1.26