# Contents of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Revision 3707 - (show annotations)
Mon Dec 5 05:48:25 2011 UTC (8 years ago) by caltinay
File size: 42671 byte(s)
```Fixes to the face element code.

```
 1 2 /******************************************************* 3 * 4 * Copyright (c) 2003-2011 by University of Queensland 5 * Earth Systems Science Computational Center (ESSCC) 6 7 * 8 * Primary Business: Queensland, Australia 9 * Licensed under the Open Software License version 3.0 10 11 * 12 *******************************************************/ 13 14 #include 15 extern "C" { 16 #include "paso/SystemMatrixPattern.h" 17 } 18 19 #if USE_SILO 20 #include 21 #ifdef ESYS_MPI 22 #include 23 #endif 24 #endif 25 26 #include 27 28 using namespace std; 29 30 namespace ripley { 31 32 Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) : 33 RipleyDomain(2), 34 m_gNE0(n0), 35 m_gNE1(n1), 36 m_l0(l0), 37 m_l1(l1), 38 m_NX(d0), 39 m_NY(d1) 40 { 41 // ensure number of subdivisions is valid and nodes can be distributed 42 // among number of ranks 43 if (m_NX*m_NY != m_mpiInfo->size) 44 throw RipleyException("Invalid number of spatial subdivisions"); 45 46 if (n0%m_NX > 0 || n1%m_NY > 0) 47 throw RipleyException("Number of elements must be separable into number of ranks in each dimension"); 48 49 // local number of elements 50 m_NE0 = n0/m_NX; 51 m_NE1 = n1/m_NY; 52 // local number of nodes (not necessarily owned) 53 m_N0 = m_NE0+1; 54 m_N1 = m_NE1+1; 55 // bottom-left node is at (offset0,offset1) in global mesh 56 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX); 57 m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX); 58 populateSampleIds(); 59 } 60 61 Rectangle::~Rectangle() 62 { 63 } 64 65 string Rectangle::getDescription() const 66 { 67 return "ripley::Rectangle"; 68 } 69 70 bool Rectangle::operator==(const AbstractDomain& other) const 71 { 72 if (dynamic_cast(&other)) 73 return this==&other; 74 75 return false; 76 } 77 78 void Rectangle::dump(const string& fileName) const 79 { 80 #if USE_SILO 81 string fn(fileName); 82 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) { 83 fn+=".silo"; 84 } 85 86 const int NUM_SILO_FILES = 1; 87 const char* blockDirFmt = "/block%04d"; 88 int driver=DB_HDF5; 89 string siloPath; 90 DBfile* dbfile = NULL; 91 92 #ifdef ESYS_MPI 93 PMPIO_baton_t* baton = NULL; 94 #endif 95 96 if (m_mpiInfo->size > 1) { 97 #ifdef ESYS_MPI 98 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm, 99 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen, 100 PMPIO_DefaultClose, (void*)&driver); 101 // try the fallback driver in case of error 102 if (!baton && driver != DB_PDB) { 103 driver = DB_PDB; 104 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm, 105 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen, 106 PMPIO_DefaultClose, (void*)&driver); 107 } 108 if (baton) { 109 char str[64]; 110 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank)); 111 siloPath = str; 112 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str()); 113 } 114 #endif 115 } else { 116 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL, 117 getDescription().c_str(), driver); 118 // try the fallback driver in case of error 119 if (!dbfile && driver != DB_PDB) { 120 driver = DB_PDB; 121 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL, 122 getDescription().c_str(), driver); 123 } 124 } 125 126 if (!dbfile) 127 throw RipleyException("dump: Could not create Silo file"); 128 129 /* 130 if (driver==DB_HDF5) { 131 // gzip level 1 already provides good compression with minimal 132 // performance penalty. Some tests showed that gzip levels >3 performed 133 // rather badly on escript data both in terms of time and space 134 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1"); 135 } 136 */ 137 138 boost::scoped_ptr x(new double[m_N0]); 139 boost::scoped_ptr y(new double[m_N1]); 140 double* coords[2] = { x.get(), y.get() }; 141 pair xdx = getFirstCoordAndSpacing(0); 142 pair ydy = getFirstCoordAndSpacing(1); 143 #pragma omp parallel 144 { 145 #pragma omp for 146 for (dim_t i0 = 0; i0 < m_N0; i0++) { 147 coords[0][i0]=xdx.first+i0*xdx.second; 148 } 149 #pragma omp for 150 for (dim_t i1 = 0; i1 < m_N1; i1++) { 151 coords[1][i1]=ydy.first+i1*ydy.second; 152 } 153 } 154 IndexVector dims = getNumNodesPerDim(); 155 156 // write mesh 157 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE, 158 DB_COLLINEAR, NULL); 159 160 // write node ids 161 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2, 162 NULL, 0, DB_INT, DB_NODECENT, NULL); 163 164 // write element ids 165 dims = getNumElementsPerDim(); 166 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0], 167 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL); 168 169 // rank 0 writes multimesh and multivar 170 if (m_mpiInfo->rank == 0) { 171 vector tempstrings; 172 vector names; 173 for (dim_t i=0; isize; i++) { 174 stringstream path; 175 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh"; 176 tempstrings.push_back(path.str()); 177 names.push_back((char*)tempstrings.back().c_str()); 178 } 179 vector types(m_mpiInfo->size, DB_QUAD_RECT); 180 DBSetDir(dbfile, "/"); 181 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0], 182 &types[0], NULL); 183 tempstrings.clear(); 184 names.clear(); 185 for (dim_t i=0; isize; i++) { 186 stringstream path; 187 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId"; 188 tempstrings.push_back(path.str()); 189 names.push_back((char*)tempstrings.back().c_str()); 190 } 191 types.assign(m_mpiInfo->size, DB_QUADVAR); 192 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0], 193 &types[0], NULL); 194 tempstrings.clear(); 195 names.clear(); 196 for (dim_t i=0; isize; i++) { 197 stringstream path; 198 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId"; 199 tempstrings.push_back(path.str()); 200 names.push_back((char*)tempstrings.back().c_str()); 201 } 202 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0], 203 &types[0], NULL); 204 } 205 206 if (m_mpiInfo->size > 1) { 207 #ifdef ESYS_MPI 208 PMPIO_HandOffBaton(baton, dbfile); 209 PMPIO_Finish(baton); 210 #endif 211 } else { 212 DBClose(dbfile); 213 } 214 215 #else // USE_SILO 216 throw RipleyException("dump(): no Silo support"); 217 #endif 218 } 219 220 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const 221 { 222 switch (fsType) { 223 case Nodes: 224 return &m_nodeId[0]; 225 case Elements: 226 return &m_elementId[0]; 227 case FaceElements: 228 return &m_faceId[0]; 229 default: 230 break; 231 } 232 233 stringstream msg; 234 msg << "borrowSampleReferenceIDs() not implemented for function space type " 235 << functionSpaceTypeAsString(fsType); 236 throw RipleyException(msg.str()); 237 } 238 239 bool Rectangle::ownSample(int fsCode, index_t id) const 240 { 241 #ifdef ESYS_MPI 242 if (fsCode != ReducedNodes) { 243 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank]; 244 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1; 245 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast); 246 } else { 247 stringstream msg; 248 msg << "ownSample() not implemented for " 249 << functionSpaceTypeAsString(fsCode); 250 throw RipleyException(msg.str()); 251 } 252 #else 253 return true; 254 #endif 255 } 256 257 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const 258 { 259 escript::Data& in = *const_cast(&cIn); 260 const dim_t numComp = in.getDataPointSize(); 261 const double h0 = m_l0/m_gNE0; 262 const double h1 = m_l1/m_gNE1; 263 if (out.getFunctionSpace().getTypeCode() == Elements) { 264 /* GENERATOR SNIP_GRAD_ELEMENTS TOP */ 265 const double tmp0_13 = 0.78867513459481288225/h1; 266 const double tmp0_0 = 0.78867513459481288225/h0; 267 const double tmp0_4 = 0.21132486540518711775/h0; 268 const double tmp0_10 = 0.78867513459481288225/h1; 269 const double tmp0_1 = 0.21132486540518711775/h0; 270 const double tmp0_8 = -0.21132486540518711775/h1; 271 const double tmp0_14 = 0.21132486540518711775/h1; 272 const double tmp0_5 = 0.78867513459481288225/h0; 273 const double tmp0_11 = -0.78867513459481288225/h1; 274 const double tmp0_2 = -0.21132486540518711775/h0; 275 const double tmp0_9 = 0.21132486540518711775/h1; 276 const double tmp0_15 = -0.21132486540518711775/h1; 277 const double tmp0_6 = -0.78867513459481288225/h0; 278 const double tmp0_3 = -0.78867513459481288225/h0; 279 const double tmp0_12 = -0.78867513459481288225/h1; 280 const double tmp0_7 = -0.21132486540518711775/h0; 281 #pragma omp parallel for 282 for (index_t k1=0; k1 < m_NE1; ++k1) { 283 for (index_t k0=0; k0 < m_NE0; ++k0) { 284 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); 285 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); 286 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); 287 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); 288 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); 289 for (index_t i=0; i < numComp; ++i) { 290 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1; 291 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9; 292 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1; 293 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13; 294 o[INDEX3(i,0,2,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5; 295 o[INDEX3(i,1,2,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9; 296 o[INDEX3(i,0,3,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5; 297 o[INDEX3(i,1,3,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13; 298 } /* end of component loop i */ 299 } /* end of k0 loop */ 300 } /* end of k1 loop */ 301 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */ 302 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) { 303 /* GENERATOR SNIP_GRAD_FACES TOP */ 304 if (m_faceOffset[0] > -1) { 305 const double tmp0_0 = 0.78867513459481288225/h0; 306 const double tmp0_4 = 0.21132486540518711775/h0; 307 const double tmp0_1 = 0.21132486540518711775/h0; 308 const double tmp0_8 = 1.0/h1; 309 const double tmp0_5 = 0.78867513459481288225/h0; 310 const double tmp0_2 = -0.21132486540518711775/h0; 311 const double tmp0_9 = -1/h1; 312 const double tmp0_6 = -0.78867513459481288225/h0; 313 const double tmp0_3 = -0.78867513459481288225/h0; 314 const double tmp0_7 = -0.21132486540518711775/h0; 315 #pragma omp parallel for 316 for (index_t k1=0; k1 < m_NE1; ++k1) { 317 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); 318 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); 319 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); 320 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); 321 double* o = out.getSampleDataRW(m_faceOffset[0]+k1); 322 for (index_t i=0; i < numComp; ++i) { 323 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1; 324 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8; 325 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5; 326 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8; 327 } /* end of component loop i */ 328 } /* end of k1 loop */ 329 } /* end of face 0 */ 330 if (m_faceOffset[1] > -1) { 331 const double tmp0_0 = 0.78867513459481288225/h0; 332 const double tmp0_4 = 0.21132486540518711775/h0; 333 const double tmp0_1 = 0.21132486540518711775/h0; 334 const double tmp0_8 = -1/h1; 335 const double tmp0_5 = 0.78867513459481288225/h0; 336 const double tmp0_2 = -0.21132486540518711775/h0; 337 const double tmp0_9 = 1.0/h1; 338 const double tmp0_6 = -0.78867513459481288225/h0; 339 const double tmp0_3 = -0.78867513459481288225/h0; 340 const double tmp0_7 = -0.21132486540518711775/h0; 341 #pragma omp parallel for 342 for (index_t k1=0; k1 < m_NE1; ++k1) { 343 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); 344 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); 345 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); 346 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); 347 double* o = out.getSampleDataRW(m_faceOffset[1]+k1); 348 for (index_t i=0; i < numComp; ++i) { 349 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1; 350 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9; 351 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5; 352 o[INDEX3(i,1,1,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9; 353 } /* end of component loop i */ 354 } /* end of k1 loop */ 355 } /* end of face 1 */ 356 if (m_faceOffset[2] > -1) { 357 const double tmp0_0 = -1/h0; 358 const double tmp0_4 = 0.21132486540518711775/h1; 359 const double tmp0_1 = 1.0/h0; 360 const double tmp0_8 = 0.78867513459481288225/h1; 361 const double tmp0_5 = 0.78867513459481288225/h1; 362 const double tmp0_2 = -0.78867513459481288225/h1; 363 const double tmp0_9 = 0.21132486540518711775/h1; 364 const double tmp0_6 = -0.21132486540518711775/h1; 365 const double tmp0_3 = -0.21132486540518711775/h1; 366 const double tmp0_7 = -0.78867513459481288225/h1; 367 #pragma omp parallel for 368 for (index_t k0=0; k0 < m_NE0; ++k0) { 369 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); 370 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); 371 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); 372 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); 373 double* o = out.getSampleDataRW(m_faceOffset[2]+k0); 374 for (index_t i=0; i < numComp; ++i) { 375 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1; 376 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_2 + f_01[i]*tmp0_5 + f_10[i]*tmp0_3 + f_11[i]*tmp0_4; 377 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1; 378 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_6 + f_01[i]*tmp0_9 + f_10[i]*tmp0_7 + f_11[i]*tmp0_8; 379 } /* end of component loop i */ 380 } /* end of k0 loop */ 381 } /* end of face 2 */ 382 if (m_faceOffset[3] > -1) { 383 const double tmp0_0 = 1.0/h0; 384 const double tmp0_4 = -0.21132486540518711775/h1; 385 const double tmp0_1 = -1/h0; 386 const double tmp0_8 = -0.78867513459481288225/h1; 387 const double tmp0_5 = -0.78867513459481288225/h1; 388 const double tmp0_2 = 0.21132486540518711775/h1; 389 const double tmp0_9 = -0.21132486540518711775/h1; 390 const double tmp0_6 = 0.78867513459481288225/h1; 391 const double tmp0_3 = 0.78867513459481288225/h1; 392 const double tmp0_7 = 0.21132486540518711775/h1; 393 #pragma omp parallel for 394 for (index_t k0=0; k0 < m_NE0; ++k0) { 395 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); 396 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); 397 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); 398 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); 399 double* o = out.getSampleDataRW(m_faceOffset[3]+k0); 400 for (index_t i=0; i < numComp; ++i) { 401 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0; 402 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_5 + f_01[i]*tmp0_3 + f_10[i]*tmp0_4 + f_11[i]*tmp0_2; 403 o[INDEX3(i,0,1,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0; 404 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_7 + f_10[i]*tmp0_8 + f_11[i]*tmp0_6; 405 } /* end of component loop i */ 406 } /* end of k0 loop */ 407 } /* end of face 3 */ 408 /* GENERATOR SNIP_GRAD_FACES BOTTOM */ 409 } else { 410 stringstream msg; 411 msg << "setToGradient() not implemented for " 412 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode()); 413 throw RipleyException(msg.str()); 414 } 415 } 416 417 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat, 418 escript::Data& rhs, const escript::Data& A, const escript::Data& B, 419 const escript::Data& C, const escript::Data& D, 420 const escript::Data& X, const escript::Data& Y, 421 const escript::Data& d, const escript::Data& y, 422 const escript::Data& d_contact, const escript::Data& y_contact, 423 const escript::Data& d_dirac, const escript::Data& y_dirac) const 424 { 425 throw RipleyException("addPDEToSystem() not implemented"); 426 } 427 428 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, 429 bool reducedColOrder) const 430 { 431 if (reducedRowOrder || reducedColOrder) 432 throw RipleyException("getPattern() not implemented for reduced order"); 433 434 // connector 435 RankVector neighbour; 436 IndexVector offsetInShared(1,0); 437 IndexVector sendShared, recvShared; 438 const IndexVector faces=getNumFacesPerBoundary(); 439 const index_t left = (m_offset0==0 ? 0 : 1); 440 const index_t bottom = (m_offset1==0 ? 0 : 1); 441 // corner node from bottom-left 442 if (faces[0] == 0 && faces[2] == 0) { 443 neighbour.push_back(m_mpiInfo->rank-m_NX-1); 444 offsetInShared.push_back(offsetInShared.back()+1); 445 sendShared.push_back(m_nodeId[m_N0+left]); 446 recvShared.push_back(m_nodeId[0]); 447 } 448 // bottom edge 449 if (faces[2] == 0) { 450 neighbour.push_back(m_mpiInfo->rank-m_NX); 451 offsetInShared.push_back(offsetInShared.back()+m_N0-left); 452 for (dim_t i=left; irank-m_NX+1); 461 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1); 462 const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1); 463 const index_t first=m_nodeDistribution[neighbour.back()]; 464 offsetInShared.push_back(offsetInShared.back()+1); 465 sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]); 466 recvShared.push_back(first+N0*(N1-1)); 467 } 468 // left edge 469 if (faces[0] == 0) { 470 neighbour.push_back(m_mpiInfo->rank-1); 471 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom); 472 for (dim_t i=bottom; irank+1); 481 const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1); 482 const index_t first=m_nodeDistribution[neighbour.back()]; 483 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom); 484 for (dim_t i=bottom; irank+m_NX-1); 492 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1); 493 const index_t first=m_nodeDistribution[neighbour.back()]; 494 offsetInShared.push_back(offsetInShared.back()+1); 495 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]); 496 recvShared.push_back(first+N0-1); 497 } 498 // top edge 499 if (faces[3] == 0) { 500 neighbour.push_back(m_mpiInfo->rank+m_NX); 501 const index_t first=m_nodeDistribution[neighbour.back()]; 502 offsetInShared.push_back(offsetInShared.back()+m_N0-left); 503 for (dim_t i=left; irank+m_NX+1); 511 const index_t first=m_nodeDistribution[neighbour.back()]; 512 offsetInShared.push_back(offsetInShared.back()+1); 513 sendShared.push_back(m_nodeId[m_N0*m_N1-1]); 514 recvShared.push_back(first); 515 } 516 const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank]; 517 /* 518 cout << "--- rcv_shcomp ---" << endl; 519 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl; 520 for (size_t i=0; i xdx = getFirstCoordAndSpacing(0); 605 pair ydy = getFirstCoordAndSpacing(1); 606 for (index_t i=0; i < getNumNodes(); i++) { 607 cout << " " << setw(5) << m_nodeId[i] 608 << " " << xdx.first+(i%m_N0)*xdx.second 609 << " " << ydy.first+(i/m_N0)*ydy.second << endl; 610 } 611 } 612 } 613 614 IndexVector Rectangle::getNumNodesPerDim() const 615 { 616 IndexVector ret; 617 ret.push_back(m_N0); 618 ret.push_back(m_N1); 619 return ret; 620 } 621 622 IndexVector Rectangle::getNumElementsPerDim() const 623 { 624 IndexVector ret; 625 ret.push_back(m_NE0); 626 ret.push_back(m_NE1); 627 return ret; 628 } 629 630 IndexVector Rectangle::getNumFacesPerBoundary() const 631 { 632 IndexVector ret(4, 0); 633 //left 634 if (m_offset0==0) 635 ret[0]=m_NE1; 636 //right 637 if (m_mpiInfo->rank%m_NX==m_NX-1) 638 ret[1]=m_NE1; 639 //bottom 640 if (m_offset1==0) 641 ret[2]=m_NE0; 642 //top 643 if (m_mpiInfo->rank/m_NX==m_NY-1) 644 ret[3]=m_NE0; 645 return ret; 646 } 647 648 pair Rectangle::getFirstCoordAndSpacing(dim_t dim) const 649 { 650 if (dim==0) { 651 return pair((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0); 652 } else if (dim==1) { 653 return pair((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1); 654 } 655 throw RipleyException("getFirstCoordAndSpacing(): invalid argument"); 656 } 657 658 //protected 659 dim_t Rectangle::getNumFaceElements() const 660 { 661 const IndexVector faces = getNumFacesPerBoundary(); 662 dim_t n=0; 663 for (size_t i=0; i xdx = getFirstCoordAndSpacing(0); 679 pair ydy = getFirstCoordAndSpacing(1); 680 arg.requireWrite(); 681 #pragma omp parallel for 682 for (dim_t i1 = 0; i1 < m_N1; i1++) { 683 for (dim_t i0 = 0; i0 < m_N0; i0++) { 684 double* point = arg.getSampleDataRW(i0+m_N0*i1); 685 point[0] = xdx.first+i0*xdx.second; 686 point[1] = ydy.first+i1*ydy.second; 687 } 688 } 689 } 690 691 //private 692 void Rectangle::populateSampleIds() 693 { 694 // identifiers are ordered from left to right, bottom to top on each rank, 695 // except for the shared nodes which are owned by the rank below / to the 696 // left of the current rank 697 698 // build node distribution vector first. 699 // m_nodeDistribution[i] is the first node id on rank i, that is 700 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes 701 m_nodeDistribution.assign(m_mpiInfo->size+1, 0); 702 m_nodeDistribution[1]=getNumNodes(); 703 for (dim_t k=1; ksize-1; k++) { 704 const index_t x=k%m_NX; 705 const index_t y=k/m_NX; 706 index_t numNodes=getNumNodes(); 707 if (x>0) 708 numNodes-=m_N1; 709 if (y>0) 710 numNodes-=m_N0; 711 if (x>0 && y>0) 712 numNodes++; // subtracted corner twice -> fix that 713 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes; 714 } 715 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal(); 716 717 m_nodeId.resize(getNumNodes()); 718 719 // the bottom row and left column are not owned by this rank so the 720 // identifiers need to be computed accordingly 721 const index_t left = (m_offset0==0 ? 0 : 1); 722 const index_t bottom = (m_offset1==0 ? 0 : 1); 723 if (left>0) { 724 const int neighbour=m_mpiInfo->rank-1; 725 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1); 726 #pragma omp parallel for 727 for (dim_t i1=bottom; i10) { 733 const int neighbour=m_mpiInfo->rank-m_NX; 734 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1); 735 const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1); 736 #pragma omp parallel for 737 for (dim_t i0=left; i00 && bottom>0) { 743 const int neighbour=m_mpiInfo->rank-m_NX-1; 744 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1); 745 const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1); 746 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1; 747 } 748 749 // the rest of the id's are contiguous 750 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank]; 751 #pragma omp parallel for 752 for (dim_t i1=bottom; i10) { 778 m_faceOffset[i]=offset; 779 offset+=facesPerEdge[i]; 780 } 781 } 782 } 783 784 //private 785 int Rectangle::insertNeighbours(IndexVector& index, index_t node) const 786 { 787 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1); 788 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1); 789 const int x=node%myN0; 790 const int y=node/myN0; 791 int num=0; 792 if (y>0) { 793 if (x>0) { 794 // bottom-left 795 index.push_back(node-myN0-1); 796 num++; 797 } 798 // bottom 799 index.push_back(node-myN0); 800 num++; 801 if (x0) { 822 // top-left 823 index.push_back(node+myN0-1); 824 num++; 825 } 826 } 827 if (x>0) { 828 // left 829 index.push_back(node-1); 830 num++; 831 } 832 833 return num; 834 } 835 836 //private 837 void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const 838 { 839 IndexVector ptr(1,0); 840 IndexVector index; 841 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1); 842 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1); 843 const IndexVector faces=getNumFacesPerBoundary(); 844 845 // bottom edge 846 dim_t n=0; 847 if (faces[0] == 0) { 848 index.push_back(2*(myN0+myN1+1)); 849 index.push_back(2*(myN0+myN1+1)+1); 850 n+=2; 851 if (faces[2] == 0) { 852 index.push_back(0); 853 index.push_back(1); 854 index.push_back(2); 855 n+=3; 856 } 857 } else if (faces[2] == 0) { 858 index.push_back(1); 859 index.push_back(2); 860 n+=2; 861 } 862 // n=neighbours of bottom-left corner node 863 ptr.push_back(ptr.back()+n); 864 n=0; 865 if (faces[2] == 0) { 866 for (dim_t i=1; i idMap; 970 dim_t N=0; 971 for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) { 972 if (idMap.find(*it)==idMap.end()) { 973 idMap[*it]=N; 974 *it=N++; 975 } else { 976 *it=idMap[*it]; 977 } 978 } 979 980 /* 981 cout << "--- colCouple_pattern ---" << endl; 982 cout << "M=" << M << ", N=" << N << endl; 983 for (size_t i=0; i