Revision 5148 - (show annotations)
Mon Sep 15 01:25:23 2014 UTC (4 years, 11 months ago) by caltinay
File size: 9634 byte(s)
```Merging ripley diagonal storage + CUDA support into trunk.
Options file version has been incremented due to new options
'cuda' and 'nvccflags'.

```
 1 2 /***************************************************************************** 3 * 4 * Copyright (c) 2003-2014 by University of Queensland 5 6 * 7 * Primary Business: Queensland, Australia 8 * Licensed under the Open Software License version 3.0 9 10 * 11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC) 12 * Development 2012-2013 by School of Earth Sciences 13 * Development from 2014 by Centre for Geoscience Computing (GeoComp) 14 * 15 *****************************************************************************/ 16 17 18 /**************************************************************************** 19 20 Adds points to the Points table of the mesh. 21 22 *****************************************************************************/ 23 24 #include "Mesh.h" 25 26 namespace finley { 27 28 #ifdef ESYS_MPI 29 void MPI_minimizeDistance(void *invec_p, void *inoutvec_p, int *len, 30 MPI_Datatype *dtype) 31 { 32 const int numPoints = (*len)/2; 33 double *invec = reinterpret_cast(invec_p); 34 double *inoutvec = reinterpret_cast(inoutvec_p); 35 for (int i=0; ireferenceElementSet; 59 numOldPoints=oldPoints->numElements; 60 } 61 ElementFile *newPoints=new ElementFile(refPoints, MPIInfo); 62 63 // first we find the node which is the closest on this processor: 64 double *dist_p = new double[numPoints]; 65 int *node_id_p = new int[numPoints]; 66 int *point_index_p = new int[numPoints]; // the code below does actually initialise this before using it 67 68 for (int i=0; iCoordinates; 74 const int numDim = getDim(); 75 if (numDim == 3) { 76 #pragma omp parallel 77 { 78 for (int i=0; inumNodes; n++) { 86 const double D0=coords[INDEX2(0,n,numDim)] - X0; 87 const double D1=coords[INDEX2(1,n,numDim)] - X1; 88 const double D2=coords[INDEX2(2,n,numDim)] - X2; 89 const double d = D0*D0 + D1*D1 + D2*D2; 90 if (d < dist_local) { 91 dist_local = d; 92 node_id_local = n; 93 } 94 } 95 #pragma omp critical 96 { 97 if ((dist_local < dist_p[i]) || ((dist_local == dist_p[i]) && (node_id_p[i]>node_id_local))) { 98 dist_p[i] = dist_local; 99 node_id_p[i] = node_id_local; 100 } 101 } 102 } 103 } // end parallel section 104 } else if (numDim == 2) { 105 #pragma omp parallel 106 { 107 for (int i=0; inumNodes; n++) { 114 const double D0=coords[INDEX2(0,n,numDim)] - X0; 115 const double D1=coords[INDEX2(1,n,numDim)] - X1; 116 const double d = D0*D0 + D1*D1; 117 if (d < dist_local) { 118 dist_local = d; 119 node_id_local = n; 120 } 121 } 122 #pragma omp critical 123 { 124 if ((dist_local < dist_p[i]) || ((dist_local == dist_p[i]) && (node_id_p[i]>node_id_local))) { 125 dist_p[i] = dist_local; 126 node_id_p[i] = node_id_local; 127 } 128 } 129 } 130 } // end parallel section 131 } else { // numDim==1 132 #pragma omp parallel 133 { 134 for (int i=0; inumNodes; n++) { 140 const double D0=coords[INDEX2(0,n,numDim)] - X0; 141 const double d = D0*D0; 142 if (d < dist_local) { 143 dist_local = d; 144 node_id_local = n; 145 } 146 } 147 #pragma omp critical 148 { 149 if ((dist_local < dist_p[i]) || ((dist_local == dist_p[i]) && (node_id_p[i]>node_id_local))) { 150 dist_p[i] = dist_local; 151 node_id_p[i] = node_id_local; 152 } 153 } 154 } 155 } // end parallel section 156 } //numDim 157 158 #ifdef ESYS_MPI 159 // now we need to reduce this across all processors 160 const int count = 2*numPoints; 161 double *sendbuf=new double[count]; 162 double *recvbuf=new double[count]; 163 164 for (int i=0; i(Nodes->Id[node_id_p[i]]); 167 } 168 MPI_Op op; 169 MPI_Op_create(MPI_minimizeDistance, true, &op); 170 MPI_Allreduce(sendbuf, recvbuf, count, MPI_DOUBLE, op, MPIInfo->comm); 171 MPI_Op_free(&op); 172 // if the node id has changed we found another node which is closer 173 // elsewhere 174 for (int i=0; i(recvbuf[2*i+1]+0.5); 176 if (best_fit_Id != Nodes->Id[node_id_p[i]]) { 177 node_id_p[i] = -1; 178 } 179 } 180 delete[] sendbuf; 181 delete[] recvbuf; 182 #endif 183 delete[] dist_p; 184 185 // we pick the points to be used on this processor 186 int numNewPoints=0; 187 const int firstDOF=Nodes->degreesOfFreedomDistribution->getFirstComponent(); 188 const int lastDOF=Nodes->degreesOfFreedomDistribution->getLastComponent(); 189 190 for (int i=0; i-1) { 192 // this processor uses a node which is identical to point i 193 if (Nodes->globalReducedDOFIndex[node_id_p[i]] > -1) { 194 // the point is also used in the reduced mesh 195 const int global_id=Nodes->globalDegreesOfFreedom[node_id_p[i]]; 196 if (firstDOF<=global_id && global_id 0) { 200 // is this point already in the Point table? 201 for (int k=0; kNodes[k]) { 203 notAnOldPoint=false; 204 break; 205 } 206 } 207 } 208 if (notAnOldPoint) { 209 // is this point unique in the new list of points? 210 bool notANewPoint=true; 211 for (int k=0; kglobalDegreesOfFreedom[node_id_p[point_index_p[k]]]) { 213 notANewPoint=false; 214 break; 215 } 216 } 217 if (notANewPoint) { 218 point_index_p[numNewPoints]=i; 219 numNewPoints++; 220 } 221 } 222 } 223 } 224 } 225 } 226 227 // now we are ready to create the new Point table 228 newPoints->allocTable(numOldPoints+numNewPoints); 229 if (numOldPoints > 0) { 230 #pragma omp parallel for schedule(static) 231 for (int n=0; nOwner[n]=oldPoints->Owner[n]; 233 newPoints->Id[n] =oldPoints->Id[n]; 234 newPoints->Tag[n] =oldPoints->Tag[n]; 235 newPoints->Nodes[n]=oldPoints->Nodes[n]; 236 newPoints->Color[n]=0; 237 } 238 } 239 #pragma omp parallel for schedule(static) 240 for (int n=0; nOwner[numOldPoints+n]=MPIInfo->rank; 243 newPoints->Id[numOldPoints+n] =0; 244 newPoints->Tag[numOldPoints+n] =tags_ptr[idx]; 245 newPoints->Nodes[numOldPoints+n]=node_id_p[idx]; 246 newPoints->Color[numOldPoints+n]=0; 247 } 248 newPoints->minColor=0; 249 newPoints->maxColor=0; 250 251 // all done, clean up 252 delete[] node_id_p; 253 delete[] point_index_p; 254 if (noError()) { 255 delete oldPoints; 256 Points=newPoints; 257 } else { 258 delete newPoints; 259 } 260 } 261 262 } // namespace finley 263

Name Value