Parent Directory
|
Revision Log
|
Patch
revision 3752 by caltinay, Tue Jan 3 08:06:36 2012 UTC | revision 3791 by caltinay, Wed Feb 1 05:10:22 2012 UTC | |
---|---|---|
# | Line 1 | Line 1 |
1 | ||
2 | /******************************************************* | /******************************************************* |
3 | * | * |
4 | * Copyright (c) 2003-2011 by University of Queensland | * Copyright (c) 2003-2012 by University of Queensland |
5 | * Earth Systems Science Computational Center (ESSCC) | * Earth Systems Science Computational Center (ESSCC) |
6 | * http://www.uq.edu.au/esscc | * http://www.uq.edu.au/esscc |
7 | * | * |
# | Line 13 | Line 13 |
13 | ||
14 | #include <ripley/Rectangle.h> | #include <ripley/Rectangle.h> |
15 | extern "C" { | extern "C" { |
16 | #include "paso/SystemMatrix.h" | #include <paso/SystemMatrix.h> |
17 | } | } |
18 | ||
19 | #if USE_SILO | #if USE_SILO |
# | Line 29 using namespace std; | Line 29 using namespace std; |
29 | ||
30 | namespace ripley { | namespace ripley { |
31 | ||
32 | Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) : | Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1, |
33 | double y1, int d0, int d1) : | |
34 | RipleyDomain(2), | RipleyDomain(2), |
35 | m_gNE0(n0), | m_gNE0(n0), |
36 | m_gNE1(n1), | m_gNE1(n1), |
37 | m_l0(l0), | m_x0(x0), |
38 | m_l1(l1), | m_y0(y0), |
39 | m_l0(x1-x0), | |
40 | m_l1(y1-y0), | |
41 | m_NX(d0), | m_NX(d0), |
42 | m_NY(d1) | m_NY(d1) |
43 | { | { |
# | Line 49 Rectangle::Rectangle(int n0, int n1, dou | Line 52 Rectangle::Rectangle(int n0, int n1, dou |
52 | if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2)) | if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2)) |
53 | throw RipleyException("Too few elements for the number of ranks"); | throw RipleyException("Too few elements for the number of ranks"); |
54 | ||
55 | // local number of elements (including overlap) | // local number of elements (with and without overlap) |
56 | m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0); | m_NE0 = m_ownNE0 = (m_NX>1 ? (n0+1)/m_NX : n0); |
57 | if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1) | if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1) |
58 | m_NE0++; | m_NE0++; |
59 | m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1); | else if (m_NX>1 && m_mpiInfo->rank%m_NX==m_NX-1) |
60 | m_ownNE0--; | |
61 | ||
62 | m_NE1 = m_ownNE1 = (m_NY>1 ? (n1+1)/m_NY : n1); | |
63 | if (m_mpiInfo->rank/m_NX>0 && m_mpiInfo->rank/m_NX<m_NY-1) | if (m_mpiInfo->rank/m_NX>0 && m_mpiInfo->rank/m_NX<m_NY-1) |
64 | m_NE1++; | m_NE1++; |
65 | else if (m_NY>1 && m_mpiInfo->rank/m_NX==m_NY-1) | |
66 | m_ownNE1--; | |
67 | ||
68 | // local number of nodes | // local number of nodes |
69 | m_N0 = m_NE0+1; | m_N0 = m_NE0+1; |
# | Line 63 Rectangle::Rectangle(int n0, int n1, dou | Line 71 Rectangle::Rectangle(int n0, int n1, dou |
71 | ||
72 | // bottom-left node is at (offset0,offset1) in global mesh | // bottom-left node is at (offset0,offset1) in global mesh |
73 | m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX); | m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX); |
74 | if (m_mpiInfo->rank%m_NX>0) | if (m_offset0 > 0) |
75 | m_offset0--; | m_offset0--; |
76 | m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank/m_NX); | m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank/m_NX); |
77 | if (m_mpiInfo->rank/m_NX>0) | if (m_offset1 > 0) |
78 | m_offset1--; | m_offset1--; |
79 | ||
80 | populateSampleIds(); | populateSampleIds(); |
81 | createPattern(); | |
82 | } | } |
83 | ||
84 | Rectangle::~Rectangle() | Rectangle::~Rectangle() |
85 | { | { |
86 | Paso_SystemMatrixPattern_free(m_pattern); | |
87 | Paso_Connector_free(m_connector); | |
88 | } | } |
89 | ||
90 | string Rectangle::getDescription() const | string Rectangle::getDescription() const |
# | Line 87 bool Rectangle::operator==(const Abstrac | Line 98 bool Rectangle::operator==(const Abstrac |
98 | if (o) { | if (o) { |
99 | return (RipleyDomain::operator==(other) && | return (RipleyDomain::operator==(other) && |
100 | m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 | m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 |
101 | && m_x0==o->m_x0 && m_y0==o->m_y0 | |
102 | && m_l0==o->m_l0 && m_l1==o->m_l1 | && m_l0==o->m_l0 && m_l1==o->m_l1 |
103 | && m_NX==o->m_NX && m_NY==o->m_NY); | && m_NX==o->m_NX && m_NY==o->m_NY); |
104 | } | } |
# | Line 105 void Rectangle::dump(const string& fileN | Line 117 void Rectangle::dump(const string& fileN |
117 | const int NUM_SILO_FILES = 1; | const int NUM_SILO_FILES = 1; |
118 | const char* blockDirFmt = "/block%04d"; | const char* blockDirFmt = "/block%04d"; |
119 | int driver=DB_HDF5; | int driver=DB_HDF5; |
string siloPath; | ||
120 | DBfile* dbfile = NULL; | DBfile* dbfile = NULL; |
121 | ||
122 | #ifdef ESYS_MPI | #ifdef ESYS_MPI |
# | Line 125 void Rectangle::dump(const string& fileN | Line 136 void Rectangle::dump(const string& fileN |
136 | PMPIO_DefaultClose, (void*)&driver); | PMPIO_DefaultClose, (void*)&driver); |
137 | } | } |
138 | if (baton) { | if (baton) { |
139 | char str[64]; | char siloPath[64]; |
140 | snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank)); | snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank)); |
141 | siloPath = str; | dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath); |
dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str()); | ||
142 | } | } |
143 | #endif | #endif |
144 | } else { | } else { |
# | Line 140 void Rectangle::dump(const string& fileN | Line 150 void Rectangle::dump(const string& fileN |
150 | dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL, | dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL, |
151 | getDescription().c_str(), driver); | getDescription().c_str(), driver); |
152 | } | } |
153 | char siloPath[64]; | |
154 | snprintf(siloPath, 64, blockDirFmt, 0); | |
155 | DBMkDir(dbfile, siloPath); | |
156 | DBSetDir(dbfile, siloPath); | |
157 | } | } |
158 | ||
159 | if (!dbfile) | if (!dbfile) |
# | Line 232 void Rectangle::dump(const string& fileN | Line 246 void Rectangle::dump(const string& fileN |
246 | } | } |
247 | ||
248 | #else // USE_SILO | #else // USE_SILO |
249 | throw RipleyException("dump(): no Silo support"); | throw RipleyException("dump: no Silo support"); |
250 | #endif | #endif |
251 | } | } |
252 | ||
# | Line 240 const int* Rectangle::borrowSampleRefere | Line 254 const int* Rectangle::borrowSampleRefere |
254 | { | { |
255 | switch (fsType) { | switch (fsType) { |
256 | case Nodes: | case Nodes: |
257 | case ReducedNodes: //FIXME: reduced | case ReducedNodes: // FIXME: reduced |
258 | return &m_nodeId[0]; | return &m_nodeId[0]; |
259 | case DegreesOfFreedom: | case DegreesOfFreedom: |
260 | case ReducedDegreesOfFreedom: //FIXME: reduced | case ReducedDegreesOfFreedom: // FIXME: reduced |
261 | return &m_dofId[0]; | return &m_dofId[0]; |
262 | case Elements: | case Elements: |
263 | case ReducedElements: | case ReducedElements: |
# | Line 256 const int* Rectangle::borrowSampleRefere | Line 270 const int* Rectangle::borrowSampleRefere |
270 | } | } |
271 | ||
272 | stringstream msg; | stringstream msg; |
273 | msg << "borrowSampleReferenceIDs() not implemented for function space type " | msg << "borrowSampleReferenceIDs: invalid function space type " << fsType; |
<< functionSpaceTypeAsString(fsType); | ||
274 | throw RipleyException(msg.str()); | throw RipleyException(msg.str()); |
275 | } | } |
276 | ||
277 | bool Rectangle::ownSample(int fsCode, index_t id) const | bool Rectangle::ownSample(int fsType, index_t id) const |
278 | { | { |
279 | #ifdef ESYS_MPI | if (getMPISize()==1) |
280 | if (fsCode == Nodes) { | return true; |
281 | const index_t left = (m_offset0==0 ? 0 : 1); | |
282 | const index_t bottom = (m_offset1==0 ? 0 : 1); | switch (fsType) { |
283 | const index_t right = (m_mpiInfo->rank%m_NX==m_NX-1 ? m_N0 : m_N0-1); | case Nodes: |
284 | const index_t top = (m_mpiInfo->rank/m_NX==m_NY-1 ? m_N1 : m_N1-1); | case ReducedNodes: // FIXME: reduced |
285 | const index_t x=id%m_N0; | return (m_dofMap[id] < getNumDOF()); |
286 | const index_t y=id/m_N0; | case DegreesOfFreedom: |
287 | return (x>=left && x<right && y>=bottom && y<top); | case ReducedDegreesOfFreedom: |
288 | return true; | |
289 | case Elements: | |
290 | case ReducedElements: | |
291 | // check ownership of element's bottom left node | |
292 | return (m_dofMap[id%m_NE0+m_N0*(id/m_NE0)] < getNumDOF()); | |
293 | case FaceElements: | |
294 | case ReducedFaceElements: | |
295 | { | |
296 | // determine which face the sample belongs to before | |
297 | // checking ownership of corresponding element's first node | |
298 | const IndexVector faces = getNumFacesPerBoundary(); | |
299 | dim_t n=0; | |
300 | for (size_t i=0; i<faces.size(); i++) { | |
301 | n+=faces[i]; | |
302 | if (id<n) { | |
303 | index_t k; | |
304 | if (i==1) | |
305 | k=m_N0-2; | |
306 | else if (i==3) | |
307 | k=m_N0*(m_N1-2); | |
308 | else | |
309 | k=0; | |
310 | // determine whether to move right or up | |
311 | const index_t delta=(i/2==0 ? m_N0 : 1); | |
312 | return (m_dofMap[k+(id-n+faces[i])*delta] < getNumDOF()); | |
313 | } | |
314 | } | |
315 | return false; | |
316 | } | |
317 | default: | |
318 | break; | |
319 | } | |
320 | ||
321 | stringstream msg; | |
322 | msg << "ownSample: invalid function space type " << fsType; | |
323 | throw RipleyException(msg.str()); | |
324 | } | |
325 | ||
326 | void Rectangle::setToNormal(escript::Data& out) const | |
327 | { | |
328 | if (out.getFunctionSpace().getTypeCode() == FaceElements) { | |
329 | out.requireWrite(); | |
330 | #pragma omp parallel | |
331 | { | |
332 | if (m_faceOffset[0] > -1) { | |
333 | #pragma omp for nowait | |
334 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
335 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | |
336 | // set vector at two quadrature points | |
337 | *o++ = -1.; | |
338 | *o++ = 0.; | |
339 | *o++ = -1.; | |
340 | *o = 0.; | |
341 | } | |
342 | } | |
343 | ||
344 | if (m_faceOffset[1] > -1) { | |
345 | #pragma omp for nowait | |
346 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
347 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | |
348 | // set vector at two quadrature points | |
349 | *o++ = 1.; | |
350 | *o++ = 0.; | |
351 | *o++ = 1.; | |
352 | *o = 0.; | |
353 | } | |
354 | } | |
355 | ||
356 | if (m_faceOffset[2] > -1) { | |
357 | #pragma omp for nowait | |
358 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
359 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | |
360 | // set vector at two quadrature points | |
361 | *o++ = 0.; | |
362 | *o++ = -1.; | |
363 | *o++ = 0.; | |
364 | *o = -1.; | |
365 | } | |
366 | } | |
367 | ||
368 | if (m_faceOffset[3] > -1) { | |
369 | #pragma omp for nowait | |
370 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
371 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | |
372 | // set vector at two quadrature points | |
373 | *o++ = 0.; | |
374 | *o++ = 1.; | |
375 | *o++ = 0.; | |
376 | *o = 1.; | |
377 | } | |
378 | } | |
379 | } // end of parallel section | |
380 | } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | |
381 | out.requireWrite(); | |
382 | #pragma omp parallel | |
383 | { | |
384 | if (m_faceOffset[0] > -1) { | |
385 | #pragma omp for nowait | |
386 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
387 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | |
388 | *o++ = -1.; | |
389 | *o = 0.; | |
390 | } | |
391 | } | |
392 | ||
393 | if (m_faceOffset[1] > -1) { | |
394 | #pragma omp for nowait | |
395 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
396 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | |
397 | *o++ = 1.; | |
398 | *o = 0.; | |
399 | } | |
400 | } | |
401 | ||
402 | if (m_faceOffset[2] > -1) { | |
403 | #pragma omp for nowait | |
404 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
405 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | |
406 | *o++ = 0.; | |
407 | *o = -1.; | |
408 | } | |
409 | } | |
410 | ||
411 | if (m_faceOffset[3] > -1) { | |
412 | #pragma omp for nowait | |
413 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
414 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | |
415 | *o++ = 0.; | |
416 | *o = 1.; | |
417 | } | |
418 | } | |
419 | } // end of parallel section | |
420 | ||
421 | } else { | } else { |
422 | stringstream msg; | stringstream msg; |
423 | msg << "ownSample() not implemented for " | msg << "setToNormal: invalid function space type " |
424 | << functionSpaceTypeAsString(fsCode); | << out.getFunctionSpace().getTypeCode(); |
425 | throw RipleyException(msg.str()); | throw RipleyException(msg.str()); |
426 | } | } |
#else | ||
return true; | ||
#endif | ||
427 | } | } |
428 | ||
429 | void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const | void Rectangle::setToSize(escript::Data& out) const |
430 | { | |
431 | if (out.getFunctionSpace().getTypeCode() == Elements | |
432 | || out.getFunctionSpace().getTypeCode() == ReducedElements) { | |
433 | out.requireWrite(); | |
434 | const dim_t numQuad=out.getNumDataPointsPerSample(); | |
435 | const double hSize=getFirstCoordAndSpacing(0).second; | |
436 | const double vSize=getFirstCoordAndSpacing(1).second; | |
437 | const double size=min(hSize,vSize); | |
438 | #pragma omp parallel for | |
439 | for (index_t k = 0; k < getNumElements(); ++k) { | |
440 | double* o = out.getSampleDataRW(k); | |
441 | fill(o, o+numQuad, size); | |
442 | } | |
443 | } else if (out.getFunctionSpace().getTypeCode() == FaceElements | |
444 | || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | |
445 | out.requireWrite(); | |
446 | const dim_t numQuad=out.getNumDataPointsPerSample(); | |
447 | const double hSize=getFirstCoordAndSpacing(0).second; | |
448 | const double vSize=getFirstCoordAndSpacing(1).second; | |
449 | #pragma omp parallel | |
450 | { | |
451 | if (m_faceOffset[0] > -1) { | |
452 | #pragma omp for nowait | |
453 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
454 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | |
455 | fill(o, o+numQuad, vSize); | |
456 | } | |
457 | } | |
458 | ||
459 | if (m_faceOffset[1] > -1) { | |
460 | #pragma omp for nowait | |
461 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
462 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | |
463 | fill(o, o+numQuad, vSize); | |
464 | } | |
465 | } | |
466 | ||
467 | if (m_faceOffset[2] > -1) { | |
468 | #pragma omp for nowait | |
469 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
470 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | |
471 | fill(o, o+numQuad, hSize); | |
472 | } | |
473 | } | |
474 | ||
475 | if (m_faceOffset[3] > -1) { | |
476 | #pragma omp for nowait | |
477 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
478 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | |
479 | fill(o, o+numQuad, hSize); | |
480 | } | |
481 | } | |
482 | } // end of parallel section | |
483 | ||
484 | } else { | |
485 | stringstream msg; | |
486 | msg << "setToSize: invalid function space type " | |
487 | << out.getFunctionSpace().getTypeCode(); | |
488 | throw RipleyException(msg.str()); | |
489 | } | |
490 | } | |
491 | ||
492 | Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, | |
493 | bool reducedColOrder) const | |
494 | { | |
495 | /* FIXME: reduced | |
496 | if (reducedRowOrder || reducedColOrder) | |
497 | throw RipleyException("getPattern() not implemented for reduced order"); | |
498 | */ | |
499 | return m_pattern; | |
500 | } | |
501 | ||
502 | void Rectangle::Print_Mesh_Info(const bool full) const | |
503 | { | |
504 | RipleyDomain::Print_Mesh_Info(full); | |
505 | if (full) { | |
506 | cout << " Id Coordinates" << endl; | |
507 | cout.precision(15); | |
508 | cout.setf(ios::scientific, ios::floatfield); | |
509 | pair<double,double> xdx = getFirstCoordAndSpacing(0); | |
510 | pair<double,double> ydy = getFirstCoordAndSpacing(1); | |
511 | for (index_t i=0; i < getNumNodes(); i++) { | |
512 | cout << " " << setw(5) << m_nodeId[i] | |
513 | << " " << xdx.first+(i%m_N0)*xdx.second | |
514 | << " " << ydy.first+(i/m_N0)*ydy.second << endl; | |
515 | } | |
516 | } | |
517 | } | |
518 | ||
519 | IndexVector Rectangle::getNumNodesPerDim() const | |
520 | { | |
521 | IndexVector ret; | |
522 | ret.push_back(m_N0); | |
523 | ret.push_back(m_N1); | |
524 | return ret; | |
525 | } | |
526 | ||
527 | IndexVector Rectangle::getNumElementsPerDim() const | |
528 | { | |
529 | IndexVector ret; | |
530 | ret.push_back(m_NE0); | |
531 | ret.push_back(m_NE1); | |
532 | return ret; | |
533 | } | |
534 | ||
535 | IndexVector Rectangle::getNumFacesPerBoundary() const | |
536 | { | |
537 | IndexVector ret(4, 0); | |
538 | //left | |
539 | if (m_offset0==0) | |
540 | ret[0]=m_NE1; | |
541 | //right | |
542 | if (m_mpiInfo->rank%m_NX==m_NX-1) | |
543 | ret[1]=m_NE1; | |
544 | //bottom | |
545 | if (m_offset1==0) | |
546 | ret[2]=m_NE0; | |
547 | //top | |
548 | if (m_mpiInfo->rank/m_NX==m_NY-1) | |
549 | ret[3]=m_NE0; | |
550 | return ret; | |
551 | } | |
552 | ||
553 | IndexVector Rectangle::getNumSubdivisionsPerDim() const | |
554 | { | |
555 | IndexVector ret; | |
556 | ret.push_back(m_NX); | |
557 | ret.push_back(m_NY); | |
558 | return ret; | |
559 | } | |
560 | ||
561 | pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const | |
562 | { | |
563 | if (dim==0) { | |
564 | return pair<double,double>(m_x0+(m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0); | |
565 | } else if (dim==1) { | |
566 | return pair<double,double>(m_y0+(m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1); | |
567 | } | |
568 | throw RipleyException("getFirstCoordAndSpacing: invalid argument"); | |
569 | } | |
570 | ||
571 | //protected | |
572 | dim_t Rectangle::getNumDOF() const | |
573 | { | |
574 | return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY; | |
575 | } | |
576 | ||
577 | //protected | |
578 | dim_t Rectangle::getNumFaceElements() const | |
579 | { | |
580 | const IndexVector faces = getNumFacesPerBoundary(); | |
581 | dim_t n=0; | |
582 | for (size_t i=0; i<faces.size(); i++) | |
583 | n+=faces[i]; | |
584 | return n; | |
585 | } | |
586 | ||
587 | //protected | |
588 | void Rectangle::assembleCoordinates(escript::Data& arg) const | |
589 | { | |
590 | escriptDataC x = arg.getDataC(); | |
591 | int numDim = m_numDim; | |
592 | if (!isDataPointShapeEqual(&x, 1, &numDim)) | |
593 | throw RipleyException("setToX: Invalid Data object shape"); | |
594 | if (!numSamplesEqual(&x, 1, getNumNodes())) | |
595 | throw RipleyException("setToX: Illegal number of samples in Data object"); | |
596 | ||
597 | pair<double,double> xdx = getFirstCoordAndSpacing(0); | |
598 | pair<double,double> ydy = getFirstCoordAndSpacing(1); | |
599 | arg.requireWrite(); | |
600 | #pragma omp parallel for | |
601 | for (dim_t i1 = 0; i1 < m_N1; i1++) { | |
602 | for (dim_t i0 = 0; i0 < m_N0; i0++) { | |
603 | double* point = arg.getSampleDataRW(i0+m_N0*i1); | |
604 | point[0] = xdx.first+i0*xdx.second; | |
605 | point[1] = ydy.first+i1*ydy.second; | |
606 | } | |
607 | } | |
608 | } | |
609 | ||
610 | //protected | |
611 | void Rectangle::assembleGradient(escript::Data& out, escript::Data& in) const | |
612 | { | { |
escript::Data& in = *const_cast<escript::Data*>(&cIn); | ||
613 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
614 | const double h0 = m_l0/m_gNE0; | const double h0 = m_l0/m_gNE0; |
615 | const double h1 = m_l1/m_gNE1; | const double h1 = m_l1/m_gNE1; |
# | Line 307 void Rectangle::setToGradient(escript::D | Line 631 void Rectangle::setToGradient(escript::D |
631 | const double cy7 = 1./h1; | const double cy7 = 1./h1; |
632 | ||
633 | if (out.getFunctionSpace().getTypeCode() == Elements) { | if (out.getFunctionSpace().getTypeCode() == Elements) { |
634 | /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */ | out.requireWrite(); |
635 | #pragma omp parallel for | #pragma omp parallel for |
636 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
637 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
638 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); |
639 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); |
640 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); |
641 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); |
642 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); |
643 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
644 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
# | Line 328 void Rectangle::setToGradient(escript::D | Line 652 void Rectangle::setToGradient(escript::D |
652 | } /* end of component loop i */ | } /* end of component loop i */ |
653 | } /* end of k0 loop */ | } /* end of k0 loop */ |
654 | } /* end of k1 loop */ | } /* end of k1 loop */ |
/* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */ | ||
655 | } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) { | } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) { |
656 | /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */ | out.requireWrite(); |
657 | #pragma omp parallel for | #pragma omp parallel for |
658 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
659 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
660 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); |
661 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); |
662 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); |
663 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); |
664 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); |
665 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
666 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); |
# | Line 345 void Rectangle::setToGradient(escript::D | Line 668 void Rectangle::setToGradient(escript::D |
668 | } /* end of component loop i */ | } /* end of component loop i */ |
669 | } /* end of k0 loop */ | } /* end of k0 loop */ |
670 | } /* end of k1 loop */ | } /* end of k1 loop */ |
/* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */ | ||
671 | } else if (out.getFunctionSpace().getTypeCode() == FaceElements) { | } else if (out.getFunctionSpace().getTypeCode() == FaceElements) { |
672 | out.requireWrite(); | |
673 | #pragma omp parallel | #pragma omp parallel |
674 | { | { |
/*** GENERATOR SNIP_GRAD_FACES TOP */ | ||
675 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
676 | #pragma omp for nowait | #pragma omp for nowait |
677 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
678 | const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); |
679 | const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); |
680 | const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); |
681 | const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); |
682 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
683 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
684 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
# | Line 369 void Rectangle::setToGradient(escript::D | Line 691 void Rectangle::setToGradient(escript::D |
691 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
692 | #pragma omp for nowait | #pragma omp for nowait |
693 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
694 | const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); |
695 | const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); |
696 | const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); |
697 | const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); |
698 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
699 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
700 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
# | Line 385 void Rectangle::setToGradient(escript::D | Line 707 void Rectangle::setToGradient(escript::D |
707 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
708 | #pragma omp for nowait | #pragma omp for nowait |
709 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
710 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); |
711 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); |
712 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); |
713 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); |
714 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
715 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
716 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; |
# | Line 401 void Rectangle::setToGradient(escript::D | Line 723 void Rectangle::setToGradient(escript::D |
723 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
724 | #pragma omp for nowait | #pragma omp for nowait |
725 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
726 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); |
727 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); |
728 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); |
729 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); |
730 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
731 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
732 | o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; |
# | Line 414 void Rectangle::setToGradient(escript::D | Line 736 void Rectangle::setToGradient(escript::D |
736 | } /* end of component loop i */ | } /* end of component loop i */ |
737 | } /* end of k0 loop */ | } /* end of k0 loop */ |
738 | } /* end of face 3 */ | } /* end of face 3 */ |
/* GENERATOR SNIP_GRAD_FACES BOTTOM */ | ||
739 | } // end of parallel section | } // end of parallel section |
740 | } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
741 | out.requireWrite(); | |
742 | #pragma omp parallel | #pragma omp parallel |
743 | { | { |
/*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */ | ||
744 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
745 | #pragma omp for nowait | #pragma omp for nowait |
746 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
747 | const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); |
748 | const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); |
749 | const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); |
750 | const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); |
751 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
752 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
753 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); |
# | Line 437 void Rectangle::setToGradient(escript::D | Line 758 void Rectangle::setToGradient(escript::D |
758 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
759 | #pragma omp for nowait | #pragma omp for nowait |
760 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
761 | const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); |
762 | const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); |
763 | const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); |
764 | const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); |
765 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
766 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
767 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); |
# | Line 451 void Rectangle::setToGradient(escript::D | Line 772 void Rectangle::setToGradient(escript::D |
772 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
773 | #pragma omp for nowait | #pragma omp for nowait |
774 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
775 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); |
776 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); |
777 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); |
778 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); |
779 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
780 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
781 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; |
# | Line 465 void Rectangle::setToGradient(escript::D | Line 786 void Rectangle::setToGradient(escript::D |
786 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
787 | #pragma omp for nowait | #pragma omp for nowait |
788 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
789 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); |
790 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); |
791 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); |
792 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); |
793 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
794 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
795 | o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; |
# | Line 476 void Rectangle::setToGradient(escript::D | Line 797 void Rectangle::setToGradient(escript::D |
797 | } /* end of component loop i */ | } /* end of component loop i */ |
798 | } /* end of k0 loop */ | } /* end of k0 loop */ |
799 | } /* end of face 3 */ | } /* end of face 3 */ |
/* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */ | ||
800 | } // end of parallel section | } // end of parallel section |
} else { | ||
stringstream msg; | ||
msg << "setToGradient() not implemented for " | ||
<< functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode()); | ||
throw RipleyException(msg.str()); | ||
801 | } | } |
802 | } | } |
803 | ||
804 | void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const | //protected |
805 | void Rectangle::assembleIntegrate(vector<double>& integrals, escript::Data& arg) const | |
806 | { | { |
807 | escript::Data& in = *const_cast<escript::Data*>(&arg); | const dim_t numComp = arg.getDataPointSize(); |
const dim_t numComp = in.getDataPointSize(); | ||
808 | const double h0 = m_l0/m_gNE0; | const double h0 = m_l0/m_gNE0; |
809 | const double h1 = m_l1/m_gNE1; | const double h1 = m_l1/m_gNE1; |
810 | const index_t left = (m_offset0==0 ? 0 : 1); | |
811 | const index_t bottom = (m_offset1==0 ? 0 : 1); | |
812 | if (arg.getFunctionSpace().getTypeCode() == Elements) { | if (arg.getFunctionSpace().getTypeCode() == Elements) { |
813 | const double w_0 = h0*h1/4.; | const double w = h0*h1/4.; |
814 | #pragma omp parallel | #pragma omp parallel |
815 | { | { |
816 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
817 | #pragma omp for nowait | #pragma omp for nowait |
818 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { |
819 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { |
820 | const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0)); | const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE0)); |
821 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
822 | const register double f_0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
823 | const register double f_1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
824 | const register double f_2 = f[INDEX2(i,2,numComp)]; | const double f2 = f[INDEX2(i,2,numComp)]; |
825 | const register double f_3 = f[INDEX2(i,3,numComp)]; | const double f3 = f[INDEX2(i,3,numComp)]; |
826 | int_local[i]+=(f_0+f_1+f_2+f_3)*w_0; | int_local[i]+=(f0+f1+f2+f3)*w; |
827 | } /* end of component loop i */ | } /* end of component loop i */ |
828 | } /* end of k0 loop */ | } /* end of k0 loop */ |
829 | } /* end of k1 loop */ | } /* end of k1 loop */ |
# | Line 516 void Rectangle::setToIntegrals(vector<do | Line 833 void Rectangle::setToIntegrals(vector<do |
833 | integrals[i]+=int_local[i]; | integrals[i]+=int_local[i]; |
834 | } // end of parallel section | } // end of parallel section |
835 | } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) { | } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) { |
836 | const double w_0 = h0*h1; | const double w = h0*h1; |
837 | #pragma omp parallel | #pragma omp parallel |
838 | { | { |
839 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
840 | #pragma omp for nowait | #pragma omp for nowait |
841 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { |
842 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { |
843 | const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0)); | const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE0)); |
844 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
845 | int_local[i]+=f[i]*w_0; | int_local[i]+=f[i]*w; |
846 | } /* end of component loop i */ | } /* end of component loop i */ |
847 | } /* end of k0 loop */ | } /* end of k0 loop */ |
848 | } /* end of k1 loop */ | } /* end of k1 loop */ |
# | Line 535 void Rectangle::setToIntegrals(vector<do | Line 852 void Rectangle::setToIntegrals(vector<do |
852 | integrals[i]+=int_local[i]; | integrals[i]+=int_local[i]; |
853 | } // end of parallel section | } // end of parallel section |
854 | } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) { | } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) { |
855 | const double w_0 = h0/2.; | const double w0 = h0/2.; |
856 | const double w_1 = h1/2.; | const double w1 = h1/2.; |
857 | #pragma omp parallel | #pragma omp parallel |
858 | { | { |
859 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
860 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
861 | #pragma omp for nowait | #pragma omp for nowait |
862 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { |
863 | const double* f = in.getSampleDataRO(m_faceOffset[0]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1); |
864 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
865 | const register double f_0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
866 | const register double f_1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
867 | int_local[i]+=(f_0+f_1)*w_1; | int_local[i]+=(f0+f1)*w1; |
868 | } /* end of component loop i */ | } /* end of component loop i */ |
869 | } /* end of k1 loop */ | } /* end of k1 loop */ |
870 | } | } |
871 | ||
872 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
873 | #pragma omp for nowait | #pragma omp for nowait |
874 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { |
875 | const double* f = in.getSampleDataRO(m_faceOffset[1]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1); |
876 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
877 | const register double f_0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
878 | const register double f_1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
879 | int_local[i]+=(f_0+f_1)*w_1; | int_local[i]+=(f0+f1)*w1; |
880 | } /* end of component loop i */ | } /* end of component loop i */ |
881 | } /* end of k1 loop */ | } /* end of k1 loop */ |
882 | } | } |
883 | ||
884 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
885 | #pragma omp for nowait | #pragma omp for nowait |
886 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { |
887 | const double* f = in.getSampleDataRO(m_faceOffset[2]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0); |
888 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
889 | const register double f_0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
890 | const register double f_1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
891 | int_local[i]+=(f_0+f_1)*w_0; | int_local[i]+=(f0+f1)*w0; |
892 | } /* end of component loop i */ | } /* end of component loop i */ |
893 | } /* end of k0 loop */ | } /* end of k0 loop */ |
894 | } | } |
895 | ||
896 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
897 | #pragma omp for nowait | #pragma omp for nowait |
898 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { |
899 | const double* f = in.getSampleDataRO(m_faceOffset[3]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0); |
900 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
901 | const register double f_0 = f[INDEX2(i,0,numComp)]; | const double f0 = f[INDEX2(i,0,numComp)]; |
902 | const register double f_1 = f[INDEX2(i,1,numComp)]; | const double f1 = f[INDEX2(i,1,numComp)]; |
903 | int_local[i]+=(f_0+f_1)*w_0; | int_local[i]+=(f0+f1)*w0; |
904 | } /* end of component loop i */ | } /* end of component loop i */ |
905 | } /* end of k0 loop */ | } /* end of k0 loop */ |
906 | } | } |
# | Line 598 void Rectangle::setToIntegrals(vector<do | Line 915 void Rectangle::setToIntegrals(vector<do |
915 | vector<double> int_local(numComp, 0); | vector<double> int_local(numComp, 0); |
916 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
917 | #pragma omp for nowait | #pragma omp for nowait |
918 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { |
919 | const double* f = in.getSampleDataRO(m_faceOffset[0]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1); |
920 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
921 | int_local[i]+=f[i]*h1; | int_local[i]+=f[i]*h1; |
922 | } /* end of component loop i */ | } /* end of component loop i */ |
# | Line 608 void Rectangle::setToIntegrals(vector<do | Line 925 void Rectangle::setToIntegrals(vector<do |
925 | ||
926 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
927 | #pragma omp for nowait | #pragma omp for nowait |
928 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (index_t k1 = bottom; k1 < bottom+m_ownNE1; ++k1) { |
929 | const double* f = in.getSampleDataRO(m_faceOffset[1]+k1); | const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1); |
930 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
931 | int_local[i]+=f[i]*h1; | int_local[i]+=f[i]*h1; |
932 | } /* end of component loop i */ | } /* end of component loop i */ |
# | Line 618 void Rectangle::setToIntegrals(vector<do | Line 935 void Rectangle::setToIntegrals(vector<do |
935 | ||
936 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
937 | #pragma omp for nowait | #pragma omp for nowait |
938 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { |
939 | const double* f = in.getSampleDataRO(m_faceOffset[2]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0); |
940 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
941 | int_local[i]+=f[i]*h0; | int_local[i]+=f[i]*h0; |
942 | } /* end of component loop i */ | } /* end of component loop i */ |
# | Line 628 void Rectangle::setToIntegrals(vector<do | Line 945 void Rectangle::setToIntegrals(vector<do |
945 | ||
946 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
947 | #pragma omp for nowait | #pragma omp for nowait |
948 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (index_t k0 = left; k0 < left+m_ownNE0; ++k0) { |
949 | const double* f = in.getSampleDataRO(m_faceOffset[3]+k0); | const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0); |
950 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
951 | int_local[i]+=f[i]*h0; | int_local[i]+=f[i]*h0; |
952 | } /* end of component loop i */ | } /* end of component loop i */ |
# | Line 640 void Rectangle::setToIntegrals(vector<do | Line 957 void Rectangle::setToIntegrals(vector<do |
957 | for (index_t i=0; i<numComp; i++) | for (index_t i=0; i<numComp; i++) |
958 | integrals[i]+=int_local[i]; | integrals[i]+=int_local[i]; |
959 | } // end of parallel section | } // end of parallel section |
} else { | ||
stringstream msg; | ||
msg << "setToIntegrals() not implemented for " | ||
<< functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode()); | ||
throw RipleyException(msg.str()); | ||
960 | } | } |
961 | } | } |
962 | ||
963 | void Rectangle::setToNormal(escript::Data& out) const | //protected |
964 | dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const | |
965 | { | { |
966 | if (out.getFunctionSpace().getTypeCode() == FaceElements) { | const dim_t nDOF0 = (m_gNE0+1)/m_NX; |
967 | #pragma omp parallel | const dim_t nDOF1 = (m_gNE1+1)/m_NY; |
968 | { | const int x=node%nDOF0; |
969 | if (m_faceOffset[0] > -1) { | const int y=node/nDOF0; |
970 | #pragma omp for nowait | dim_t num=0; |
971 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | // loop through potential neighbours and add to index if positions are |
972 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | // within bounds |
973 | // set vector at two quadrature points | for (int i1=-1; i1<2; i1++) { |
974 | *o++ = -1.; | for (int i0=-1; i0<2; i0++) { |
975 | *o++ = 0.; | // skip node itself |
976 | *o++ = -1.; | if (i0==0 && i1==0) |
977 | *o = 0.; | continue; |
978 | } | // location of neighbour node |
979 | const int nx=x+i0; | |
980 | const int ny=y+i1; | |
981 | if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) { | |
982 | index.push_back(ny*nDOF0+nx); | |
983 | num++; | |
984 | } | } |
985 | } | |
986 | } | |
987 | ||
988 | if (m_faceOffset[1] > -1) { | return num; |
989 | #pragma omp for nowait | } |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { | ||
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | ||
// set vector at two quadrature points | ||
*o++ = 1.; | ||
*o++ = 0.; | ||
*o++ = 1.; | ||
*o = 0.; | ||
} | ||
} | ||
990 | ||
991 | if (m_faceOffset[2] > -1) { | //protected |
992 | #pragma omp for nowait | void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const |
993 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | { |
994 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | const dim_t numComp = in.getDataPointSize(); |
995 | // set vector at two quadrature points | out.requireWrite(); |
996 | *o++ = 0.; | |
997 | *o++ = -1.; | const index_t left = (m_offset0==0 ? 0 : 1); |
998 | *o++ = 0.; | const index_t bottom = (m_offset1==0 ? 0 : 1); |
999 | *o = -1.; | const dim_t nDOF0 = (m_gNE0+1)/m_NX; |
1000 | } | const dim_t nDOF1 = (m_gNE1+1)/m_NY; |
1001 | } | #pragma omp parallel for |
1002 | for (index_t i=0; i<nDOF1; i++) { | |
1003 | for (index_t j=0; j<nDOF0; j++) { | |
1004 | const index_t n=j+left+(i+bottom)*m_N0; | |
1005 | const double* src=in.getSampleDataRO(n); | |
1006 | copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0)); | |
1007 | } | |
1008 | } | |
1009 | } | |
1010 | ||
1011 | //protected | |
1012 | void Rectangle::dofToNodes(escript::Data& out, escript::Data& in) const | |
1013 | { | |
1014 | const dim_t numComp = in.getDataPointSize(); | |
1015 | Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp); | |
1016 | in.requireWrite(); | |
1017 | Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0)); | |
1018 | ||
1019 | const dim_t numDOF = getNumDOF(); | |
1020 | out.requireWrite(); | |
1021 | const double* buffer = Paso_Coupler_finishCollect(coupler); | |
1022 | ||
1023 | #pragma omp parallel for | |
1024 | for (index_t i=0; i<getNumNodes(); i++) { | |
1025 | const double* src=(m_dofMap[i]<numDOF ? | |
1026 | in.getSampleDataRO(m_dofMap[i]) | |
1027 | : &buffer[(m_dofMap[i]-numDOF)*numComp]); | |
1028 | copy(src, src+numComp, out.getSampleDataRW(i)); | |
1029 | } | |
1030 | } | |
1031 | ||
1032 | //private | |
1033 | void Rectangle::populateSampleIds() | |
1034 | { | |
1035 | // identifiers are ordered from left to right, bottom to top globablly. | |
1036 | ||
1037 | // build node distribution vector first. | |
1038 | // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes | |
1039 | m_nodeDistribution.assign(m_mpiInfo->size+1, 0); | |
1040 | const dim_t numDOF=getNumDOF(); | |
1041 | for (dim_t k=1; k<m_mpiInfo->size; k++) { | |
1042 | m_nodeDistribution[k]=k*numDOF; | |
1043 | } | |
1044 | m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal(); | |
1045 | m_nodeId.resize(getNumNodes()); | |
1046 | m_dofId.resize(numDOF); | |
1047 | m_elementId.resize(getNumElements()); | |
1048 | m_faceId.resize(getNumFaceElements()); | |
1049 | ||
if (m_faceOffset[3] > -1) { | ||
#pragma omp for nowait | ||
for (index_t k0 = 0; k0 < m_NE0; ++k0) { | ||
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | ||
// set vector at two quadrature points | ||
*o++ = 0.; | ||
*o++ = 1.; | ||
*o++ = 0.; | ||
*o = 1.; | ||
} | ||
} | ||
} // end of parallel section | ||
} else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | ||
1050 | #pragma omp parallel | #pragma omp parallel |
1051 | { | { |
1052 | if (m_faceOffset[0] > -1) { | // nodes |
1053 | #pragma omp for nowait | #pragma omp for nowait |
1054 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (dim_t i1=0; i1<m_N1; i1++) { |
1055 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | for (dim_t i0=0; i0<m_N0; i0++) { |
1056 | *o++ = -1.; | m_nodeId[i0+i1*m_N0] = (m_offset1+i1)*(m_gNE0+1)+m_offset0+i0; |
*o = 0.; | ||
} | ||
1057 | } | } |
1058 | } | |
1059 | ||
1060 | if (m_faceOffset[1] > -1) { | // degrees of freedom |
1061 | #pragma omp for nowait | #pragma omp for nowait |
1062 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | for (dim_t k=0; k<numDOF; k++) |
1063 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k; |
*o++ = 1.; | ||
*o = 0.; | ||
} | ||
} | ||
1064 | ||
1065 | if (m_faceOffset[2] > -1) { | // elements |
1066 | #pragma omp for nowait | #pragma omp for nowait |
1067 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (dim_t i1=0; i1<m_NE1; i1++) { |
1068 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | for (dim_t i0=0; i0<m_NE0; i0++) { |
1069 | *o++ = 0.; | m_elementId[i0+i1*m_NE0]=(m_offset1+i1)*m_gNE0+m_offset0+i0; |
*o = -1.; | ||
} | ||
1070 | } | } |
1071 | } | |
1072 | ||
1073 | if (m_faceOffset[3] > -1) { | // face elements |
1074 | #pragma omp for nowait | #pragma omp for |
1075 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | for (dim_t k=0; k<getNumFaceElements(); k++) |
1076 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | m_faceId[k]=k; |
1077 | *o++ = 0.; | } // end parallel section |
*o = 1.; | ||
} | ||
} | ||
} // end of parallel section | ||
1078 | ||
1079 | } else { | m_nodeTags.assign(getNumNodes(), 0); |
1080 | stringstream msg; | updateTagsInUse(Nodes); |
1081 | msg << "setToNormal() not implemented for " | |
1082 | << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode()); | m_elementTags.assign(getNumElements(), 0); |
1083 | throw RipleyException(msg.str()); | updateTagsInUse(Elements); |
1084 | ||
1085 | // generate face offset vector and set face tags | |
1086 | const IndexVector facesPerEdge = getNumFacesPerBoundary(); | |
1087 | const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20; | |
1088 | const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP }; | |
1089 | m_faceOffset.assign(facesPerEdge.size(), -1); | |
1090 | m_faceTags.clear(); | |
1091 | index_t offset=0; | |
1092 | for (size_t i=0; i<facesPerEdge.size(); i++) { | |
1093 | if (facesPerEdge[i]>0) { | |
1094 | m_faceOffset[i]=offset; | |
1095 | offset+=facesPerEdge[i]; | |
1096 | m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]); | |
1097 | } | |
1098 | } | } |
1099 | setTagMap("left", LEFT); | |
1100 | setTagMap("right", RIGHT); | |
1101 | setTagMap("bottom", BOTTOM); | |
1102 | setTagMap("top", TOP); | |
1103 | updateTagsInUse(FaceElements); | |
1104 | } | } |
1105 | ||
1106 | Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, | //private |
1107 | bool reducedColOrder) const | void Rectangle::createPattern() |
1108 | { | { |
1109 | if (reducedRowOrder || reducedColOrder) | const dim_t nDOF0 = (m_gNE0+1)/m_NX; |
1110 | throw RipleyException("getPattern() not implemented for reduced order"); | const dim_t nDOF1 = (m_gNE1+1)/m_NY; |
// connector | ||
RankVector neighbour; | ||
IndexVector offsetInShared(1,0); | ||
IndexVector sendShared, recvShared; | ||
const IndexVector faces=getNumFacesPerBoundary(); | ||
const index_t nDOF0 = (m_gNE0+1)/m_NX; | ||
const index_t nDOF1 = (m_gNE1+1)/m_NY; | ||
const int numDOF=nDOF0*nDOF1; | ||
1111 | const index_t left = (m_offset0==0 ? 0 : 1); | const index_t left = (m_offset0==0 ? 0 : 1); |
1112 | const index_t bottom = (m_offset1==0 ? 0 : 1); | const index_t bottom = (m_offset1==0 ? 0 : 1); |
vector<IndexVector> colIndices(numDOF); // for the couple blocks | ||
int numShared=0; | ||
1113 | ||
1114 | // populate node->DOF mapping with own degrees of freedom. | |
1115 | // The rest is assigned in the loop further down | |
1116 | m_dofMap.assign(getNumNodes(), 0); | m_dofMap.assign(getNumNodes(), 0); |
1117 | #pragma omp parallel for | #pragma omp parallel for |
1118 | for (index_t i=bottom; i<m_N1; i++) { | for (index_t i=bottom; i<bottom+nDOF1; i++) { |
1119 | for (index_t j=left; j<m_N0; j++) { | for (index_t j=left; j<left+nDOF0; j++) { |
1120 | m_dofMap[i*m_N0+j]=(i-bottom)*nDOF0+j-left; | m_dofMap[i*m_N0+j]=(i-bottom)*nDOF0+j-left; |
1121 | } | } |
1122 | } | } |
1123 | ||
1124 | // corner node from bottom-left | // build list of shared components and neighbours by looping through |
1125 | if (faces[0] == 0 && faces[2] == 0) { | // all potential neighbouring ranks and checking if positions are |
1126 | neighbour.push_back(m_mpiInfo->rank-m_NX-1); | // within bounds |
1127 | offsetInShared.push_back(offsetInShared.back()+1); | const dim_t numDOF=nDOF0*nDOF1; |
1128 | sendShared.push_back(0); | vector<IndexVector> colIndices(numDOF); // for the couple blocks |
1129 | recvShared.push_back(numDOF+numShared); | RankVector neighbour; |
1130 | colIndices[0].push_back(numShared); | IndexVector offsetInShared(1,0); |
1131 | m_dofMap[0]=numDOF+numShared; | IndexVector sendShared, recvShared; |
1132 | ++numShared; | int numShared=0; |
1133 | } | const int x=m_mpiInfo->rank%m_NX; |
1134 | // bottom edge | const int y=m_mpiInfo->rank/m_NX; |
1135 | if (faces[2] == 0) { | for (int i1=-1; i1<2; i1++) { |
1136 | neighbour.push_back(m_mpiInfo->rank-m_NX); | for (int i0=-1; i0<2; i0++) { |
1137 | offsetInShared.push_back(offsetInShared.back()+nDOF0); | // skip this rank |
1138 | for (dim_t i=0; i<nDOF0; i++, numShared++) { | if (i0==0 && i1==0) |
1139 | sendShared.push_back(i); | continue; |
1140 | recvShared.push_back(numDOF+numShared); | // location of neighbour rank |
1141 | if (i>0) | const int nx=x+i0; |
1142 | colIndices[i-1].push_back(numShared); | const int ny=y+i1; |
1143 | colIndices[i].push_back(numShared); | if (nx>=0 && ny>=0 && nx<m_NX && ny<m_NY) { |
1144 | if (i<nDOF0-1) | neighbour.push_back(ny*m_NX+nx); |
1145 | colIndices[i+1].push_back(numShared); | if (i0==0) { |
1146 | m_dofMap[i+left]=numDOF+numShared; | // sharing top or bottom edge |
1147 | } | const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0); |
1148 | } | const int firstNode=(i1==-1 ? left : m_N0*(m_N1-1)+left); |
1149 | // corner node from bottom-right | offsetInShared.push_back(offsetInShared.back()+nDOF0); |
1150 | if (faces[1] == 0 && faces[2] == 0) { | for (dim_t i=0; i<nDOF0; i++, numShared++) { |
1151 | neighbour.push_back(m_mpiInfo->rank-m_NX+1); | sendShared.push_back(firstDOF+i); |
1152 | offsetInShared.push_back(offsetInShared.back()+1); | recvShared.push_back(numDOF+numShared); |
1153 | sendShared.push_back(nDOF0-1); | if (i>0) |
1154 | recvShared.push_back(numDOF+numShared); | colIndices[firstDOF+i-1].push_back(numShared); |
1155 | colIndices[nDOF0-2].push_back(numShared); | colIndices[firstDOF+i].push_back(numShared); |
1156 | colIndices[nDOF0-1].push_back(numShared); | if (i<nDOF0-1) |
1157 | m_dofMap[m_N0-1]=numDOF+numShared; | colIndices[firstDOF+i+1].push_back(numShared); |
1158 | ++numShared; | m_dofMap[firstNode+i]=numDOF+numShared; |
1159 | } | } |
1160 | // right edge | } else if (i1==0) { |
1161 | if (faces[1] == 0) { | // sharing left or right edge |
1162 | neighbour.push_back(m_mpiInfo->rank+1); | const int firstDOF=(i0==-1 ? 0 : nDOF0-1); |
1163 | offsetInShared.push_back(offsetInShared.back()+nDOF1); | const int firstNode=(i0==-1 ? bottom*m_N0 : (bottom+1)*m_N0-1); |
1164 | for (dim_t i=0; i<nDOF1; i++, numShared++) { | offsetInShared.push_back(offsetInShared.back()+nDOF1); |
1165 | sendShared.push_back((i+1)*(nDOF0)-1); | for (dim_t i=0; i<nDOF1; i++, numShared++) { |
1166 | recvShared.push_back(numDOF+numShared); | sendShared.push_back(firstDOF+i*nDOF0); |
1167 | if (i>0) | recvShared.push_back(numDOF+numShared); |
1168 | colIndices[i*(nDOF0)-1].push_back(numShared); | if (i>0) |
1169 | colIndices[(i+1)*(nDOF0)-1].push_back(numShared); | colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared); |
1170 | if (i<nDOF1-1) | colIndices[firstDOF+i*nDOF0].push_back(numShared); |
1171 | colIndices[(i+2)*(nDOF0)-1].push_back(numShared); | if (i<nDOF1-1) |
1172 | m_dofMap[(i+bottom+1)*m_N0-1]=numDOF+numShared; | colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared); |
1173 | } | m_dofMap[firstNode+i*m_N0]=numDOF+numShared; |
1174 | } | } |
1175 | // corner node from top-right | } else { |
1176 | if (faces[1] == 0 && faces[3] == 0) { | // sharing a node |
1177 | neighbour.push_back(m_mpiInfo->rank+m_NX+1); | const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0); |
1178 | offsetInShared.push_back(offsetInShared.back()+1); | const int node=(i0+1)/2*(m_N0-1)+(i1+1)/2*m_N0*(m_N1-1); |
1179 | sendShared.push_back(numDOF-1); | offsetInShared.push_back(offsetInShared.back()+1); |
1180 | recvShared.push_back(numDOF+numShared); | sendShared.push_back(dof); |
1181 | colIndices[numDOF-1].push_back(numShared); | recvShared.push_back(numDOF+numShared); |
1182 | m_dofMap[m_N0*m_N1-1]=numDOF+numShared; | colIndices[dof].push_back(numShared); |
1183 | ++numShared; | m_dofMap[node]=numDOF+numShared; |
1184 | } | ++numShared; |
1185 | // top edge | } |
1186 | if (faces[3] == 0) { | } |
neighbour.push_back(m_mpiInfo->rank+m_NX); | ||
offsetInShared.push_back(offsetInShared.back()+nDOF0); | ||
for (dim_t i=0; i<nDOF0; i++, numShared++) { | ||
sendShared.push_back(numDOF-nDOF0+i); | ||
recvShared.push_back(numDOF+numShared); | ||
if (i>0) | ||
colIndices[numDOF-nDOF0+i-1].push_back(numShared); | ||
colIndices[numDOF-nDOF0+i].push_back(numShared); | ||
if (i<nDOF0-1) | ||
colIndices[numDOF-nDOF0+i+1].push_back(numShared); | ||
m_dofMap[m_N0*(m_N1-1)+i+left]=numDOF+numShared; | ||
} | ||
} | ||
// corner node from top-left | ||
if (faces[0] == 0 && faces[3] == 0) { | ||
neighbour.push_back(m_mpiInfo->rank+m_NX-1); | ||
offsetInShared.push_back(offsetInShared.back()+1); | ||
sendShared.push_back(numDOF-nDOF0); | ||
recvShared.push_back(numDOF+numShared); | ||
colIndices[numDOF-nDOF0].push_back(numShared); | ||
m_dofMap[m_N0*(m_N1-1)]=numDOF+numShared; | ||
++numShared; | ||
} | ||
// left edge | ||
if (faces[0] == 0) { | ||
neighbour.push_back(m_mpiInfo->rank-1); | ||
offsetInShared.push_back(offsetInShared.back()+nDOF1); | ||
for (dim_t i=0; i<nDOF1; i++, numShared++) { | ||
sendShared.push_back(i*nDOF0); | ||
recvShared.push_back(numDOF+numShared); | ||
if (i>0) | ||
colIndices[(i-1)*nDOF0].push_back(numShared); | ||
colIndices[i*nDOF0].push_back(numShared); | ||
if (i<nDOF1-1) | ||
colIndices[(i+1)*nDOF0].push_back(numShared); | ||
m_dofMap[(i+bottom)*m_N0]=numDOF+numShared; | ||
1187 | } | } |
1188 | } | } |
1189 | ||
1190 | // create connector | |
1191 | Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( | |
1192 | numDOF, neighbour.size(), &neighbour[0], &sendShared[0], | |
1193 | &offsetInShared[0], 1, 0, m_mpiInfo); | |
1194 | Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc( | |
1195 | numDOF, neighbour.size(), &neighbour[0], &recvShared[0], | |
1196 | &offsetInShared[0], 1, 0, m_mpiInfo); | |
1197 | m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp); | |
1198 | Paso_SharedComponents_free(snd_shcomp); | |
1199 | Paso_SharedComponents_free(rcv_shcomp); | |
1200 | ||
1201 | // create main and couple blocks | |
1202 | Paso_Pattern *mainPattern = createMainPattern(); | |
1203 | Paso_Pattern *colPattern, *rowPattern; | |
1204 | createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern); | |
1205 | ||
1206 | // allocate paso distribution | |
1207 | Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, | |
1208 | const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0); | |
1209 | ||
1210 | // finally create the system matrix | |
1211 | m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT, | |
1212 | distribution, distribution, mainPattern, colPattern, rowPattern, | |
1213 | m_connector, m_connector); | |
1214 | ||
1215 | Paso_Distribution_free(distribution); | |
1216 | ||
1217 | // useful debug output | |
1218 | /* | /* |
1219 | cout << "--- rcv_shcomp ---" << endl; | cout << "--- rcv_shcomp ---" << endl; |
1220 | cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl; | cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl; |
# | Line 892 Paso_SystemMatrixPattern* Rectangle::get | Line 1229 Paso_SystemMatrixPattern* Rectangle::get |
1229 | for (size_t i=0; i<sendShared.size(); i++) { | for (size_t i=0; i<sendShared.size(); i++) { |
1230 | cout << "shared[" << i << "]=" << sendShared[i] << endl; | cout << "shared[" << i << "]=" << sendShared[i] << endl; |
1231 | } | } |
1232 | cout << "--- dofMap ---" << endl; | |
1233 | for (size_t i=0; i<m_dofMap.size(); i++) { | |
1234 | cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl; | |
1235 | } | |
1236 | cout << "--- colIndices ---" << endl; | |
1237 | for (size_t i=0; i<colIndices.size(); i++) { | |
1238 | cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl; | |
1239 | } | |
1240 | */ | */ |
1241 | ||
Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( | ||
numDOF, neighbour.size(), &neighbour[0], &sendShared[0], | ||
&offsetInShared[0], 1, 0, m_mpiInfo); | ||
Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc( | ||
numDOF, neighbour.size(), &neighbour[0], &recvShared[0], | ||
&offsetInShared[0], 1, 0, m_mpiInfo); | ||
Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp); | ||
Paso_SharedComponents_free(snd_shcomp); | ||
Paso_SharedComponents_free(rcv_shcomp); | ||
// create patterns | ||
dim_t M, N; | ||
IndexVector ptr(1,0); | ||
IndexVector index; | ||
// main pattern | ||
for (index_t i=0; i<numDOF; i++) { | ||
// always add the node itself | ||
index.push_back(i); | ||
const int num=insertNeighbours(index, i); | ||
ptr.push_back(ptr.back()+num+1); | ||
} | ||
M=N=ptr.size()-1; | ||
// paso will manage the memory | ||
index_t* indexC = MEMALLOC(index.size(),index_t); | ||
index_t* ptrC = MEMALLOC(ptr.size(), index_t); | ||
copy(index.begin(), index.end(), indexC); | ||
copy(ptr.begin(), ptr.end(), ptrC); | ||
Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, | ||
M, N, ptrC, indexC); | ||
1242 | /* | /* |
1243 | cout << "--- main_pattern ---" << endl; | cout << "--- main_pattern ---" << endl; |
1244 | cout << "M=" << M << ", N=" << N << endl; | cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl; |
1245 | for (size_t i=0; i<ptr.size(); i++) { | for (size_t i=0; i<mainPattern->numOutput+1; i++) { |
1246 | cout << "ptr[" << i << "]=" << ptr[i] << endl; | cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl; |
1247 | } | } |
1248 | for (size_t i=0; i<index.size(); i++) { | for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) { |
1249 | cout << "index[" << i << "]=" << index[i] << endl; | cout << "index[" << i << "]=" << mainPattern->index[i] << endl; |
1250 | } | } |
1251 | */ | */ |
1252 | ||
// column & row couple patterns | ||
ptr.assign(1, 0); | ||
index.clear(); | ||
for (index_t i=0; i<numDOF; i++) { | ||
index.insert(index.end(), colIndices[i].begin(), colIndices[i].end()); | ||
ptr.push_back(ptr.back()+colIndices[i].size()); | ||
} | ||
// paso will manage the memory | ||
indexC = MEMALLOC(index.size(), index_t); | ||
ptrC = MEMALLOC(ptr.size(), index_t); | ||
copy(index.begin(), index.end(), indexC); | ||
copy(ptr.begin(), ptr.end(), ptrC); | ||
M=ptr.size()-1; | ||
N=numShared; | ||
Paso_Pattern *colCouplePattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, | ||
M, N, ptrC, indexC); | ||
1253 | /* | /* |
1254 | cout << "--- colCouple_pattern ---" << endl; | cout << "--- colCouple_pattern ---" << endl; |
1255 | cout << "M=" << M << ", N=" << N << endl; | cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl; |
1256 | for (size_t i=0; i<ptr.size(); i++) { | for (size_t i=0; i<colPattern->numOutput+1; i++) { |
1257 | cout << "ptr[" << i << "]=" << ptr[i] << endl; | cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl; |
1258 | } | } |
1259 | for (size_t i=0; i<index.size(); i++) { | for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) { |
1260 | cout << "index[" << i << "]=" << index[i] << endl; | cout << "index[" << i << "]=" << colPattern->index[i] << endl; |
1261 | } | } |
1262 | */ | */ |
1263 | ||
// now build the row couple pattern | ||
IndexVector ptr2(1,0); | ||
IndexVector index2; | ||
for (dim_t id=0; id<N; id++) { | ||
dim_t n=0; | ||
for (dim_t i=0; i<M; i++) { | ||
for (dim_t j=ptr[i]; j<ptr[i+1]; j++) { | ||
if (index[j]==id) { | ||
index2.push_back(i); | ||
n++; | ||
break; | ||
} | ||
} | ||
} | ||
ptr2.push_back(ptr2.back()+n); | ||
} | ||
// paso will manage the memory | ||
indexC = MEMALLOC(index2.size(), index_t); | ||
ptrC = MEMALLOC(ptr2.size(), index_t); | ||
copy(index2.begin(), index2.end(), indexC); | ||
copy(ptr2.begin(), ptr2.end(), ptrC); | ||
Paso_Pattern *rowCouplePattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, | ||
N, M, ptrC, indexC); | ||
1264 | /* | /* |
1265 | cout << "--- rowCouple_pattern ---" << endl; | cout << "--- rowCouple_pattern ---" << endl; |
1266 | cout << "M=" << N << ", N=" << M << endl; | cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl; |
1267 | for (size_t i=0; i<ptr2.size(); i++) { | for (size_t i=0; i<rowPattern->numOutput+1; i++) { |
1268 | cout << "ptr[" << i << "]=" << ptr2[i] << endl; | cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl; |
1269 | } | } |
1270 | for (size_t i=0; i<index2.size(); i++) { | for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) { |
1271 | cout << "index[" << i << "]=" << index2[i] << endl; | cout << "index[" << i << "]=" << rowPattern->index[i] << endl; |
1272 | } | } |
1273 | */ | */ |
1274 | ||
// allocate paso distribution | ||
Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, | ||
const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0); | ||
Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc( | ||
MATRIX_FORMAT_DEFAULT, distribution, distribution, | ||
mainPattern, colCouplePattern, rowCouplePattern, | ||
connector, connector); | ||
1275 | Paso_Pattern_free(mainPattern); | Paso_Pattern_free(mainPattern); |
1276 | Paso_Pattern_free(colCouplePattern); | Paso_Pattern_free(colPattern); |
1277 | Paso_Pattern_free(rowCouplePattern); | Paso_Pattern_free(rowPattern); |
Paso_Distribution_free(distribution); | ||
return pattern; | ||
} | ||
void Rectangle::Print_Mesh_Info(const bool full) const | ||
{ | ||
RipleyDomain::Print_Mesh_Info(full); | ||
if (full) { | ||
cout << " Id Coordinates" << endl; | ||
cout.precision(15); | ||
cout.setf(ios::scientific, ios::floatfield); | ||
pair<double,double> xdx = getFirstCoordAndSpacing(0); | ||
pair<double,double> ydy = getFirstCoordAndSpacing(1); | ||
for (index_t i=0; i < getNumNodes(); i++) { | ||
cout << " " << setw(5) << m_nodeId[i] | ||
<< " " << xdx.first+(i%m_N0)*xdx.second | ||
<< " " << ydy.first+(i/m_N0)*ydy.second << endl; | ||
} | ||
} | ||
} | ||
IndexVector Rectangle::getNumNodesPerDim() const | ||
{ | ||
IndexVector ret; | ||
ret.push_back(m_N0); | ||
ret.push_back(m_N1); | ||
return ret; | ||
} | ||
IndexVector Rectangle::getNumElementsPerDim() const | ||
{ | ||
IndexVector ret; | ||
ret.push_back(m_NE0); | ||
ret.push_back(m_NE1); | ||
return ret; | ||
} | ||
IndexVector Rectangle::getNumFacesPerBoundary() const | ||
{ | ||
IndexVector ret(4, 0); | ||
//left | ||
if (m_offset0==0) | ||
ret[0]=m_NE1; | ||
//right | ||
if (m_mpiInfo->rank%m_NX==m_NX-1) | ||
ret[1]=m_NE1; | ||
//bottom | ||
if (m_offset1==0) | ||
ret[2]=m_NE0; | ||
//top | ||
if (m_mpiInfo->rank/m_NX==m_NY-1) | ||
ret[3]=m_NE0; | ||
return ret; | ||
} | ||
pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const | ||
{ | ||
if (dim==0) { | ||
return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0); | ||
} else if (dim==1) { | ||
return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1); | ||
} | ||
throw RipleyException("getFirstCoordAndSpacing(): invalid argument"); | ||
} | ||
//protected | ||
dim_t Rectangle::getNumDOF() const | ||
{ | ||
return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY; | ||
} | ||
//protected | ||
dim_t Rectangle::getNumFaceElements() const | ||
{ | ||
const IndexVector faces = getNumFacesPerBoundary(); | ||
dim_t n=0; | ||
for (size_t i=0; i<faces.size(); i++) | ||
n+=faces[i]; | ||
return n; | ||
} | ||
//protected | ||
void Rectangle::assembleCoordinates(escript::Data& arg) const | ||
{ | ||
escriptDataC x = arg.getDataC(); | ||
int numDim = m_numDim; | ||
if (!isDataPointShapeEqual(&x, 1, &numDim)) | ||
throw RipleyException("setToX: Invalid Data object shape"); | ||
if (!numSamplesEqual(&x, 1, getNumNodes())) | ||
throw RipleyException("setToX: Illegal number of samples in Data object"); | ||
pair<double,double> xdx = getFirstCoordAndSpacing(0); | ||
pair<double,double> ydy = getFirstCoordAndSpacing(1); | ||
arg.requireWrite(); | ||
#pragma omp parallel for | ||
for (dim_t i1 = 0; i1 < m_N1; i1++) { | ||
for (dim_t i0 = 0; i0 < m_N0; i0++) { | ||
double* point = arg.getSampleDataRW(i0+m_N0*i1); | ||
point[0] = xdx.first+i0*xdx.second; | ||
point[1] = ydy.first+i1*ydy.second; | ||
} | ||
} | ||
1278 | } | } |
1279 | ||
1280 | //private | //private |
1281 | void Rectangle::populateSampleIds() | void Rectangle::addToMatrixAndRHS(Paso_SystemMatrix* S, escript::Data& F, |
1282 | const vector<double>& EM_S, const vector<double>& EM_F, bool addS, | |
1283 | bool addF, index_t firstNode, dim_t nEq, dim_t nComp) const | |
1284 | { | { |
1285 | // identifiers are ordered from left to right, bottom to top globablly. | IndexVector rowIndex; |
1286 | rowIndex.push_back(m_dofMap[firstNode]); | |
1287 | // build node distribution vector first. | rowIndex.push_back(m_dofMap[firstNode+1]); |
1288 | // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes | rowIndex.push_back(m_dofMap[firstNode+m_N0]); |
1289 | m_nodeDistribution.assign(m_mpiInfo->size+1, 0); | rowIndex.push_back(m_dofMap[firstNode+m_N0+1]); |
1290 | const dim_t numDOF=getNumDOF(); | if (addF) { |
1291 | for (dim_t k=1; k<m_mpiInfo->size; k++) { | double *F_p=F.getSampleDataRW(0); |
1292 | m_nodeDistribution[k]=k*numDOF; | for (index_t i=0; i<rowIndex.size(); i++) { |
1293 | } | if (rowIndex[i]<getNumDOF()) { |
1294 | m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal(); | for (index_t eq=0; eq<nEq; eq++) { |
1295 | m_nodeId.resize(getNumNodes()); | F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)]; |
1296 | } | |
1297 | #pragma omp parallel for | } |
for (dim_t i1=0; i1<m_N1; i1++) { | ||
for (dim_t i0=0; i0<m_N0; i0++) { | ||
m_nodeId[i0+i1*m_N0] = (m_offset1+i1)*(m_gNE0+1)+m_offset0+i0; | ||
1298 | } | } |
1299 | } | } |
1300 | if (addS) { | |
1301 | m_dofId.resize(numDOF); | addToSystemMatrix(S, rowIndex, nEq, rowIndex, nComp, EM_S); |
const index_t firstId=m_nodeDistribution[m_mpiInfo->rank]; | ||
for (dim_t i=0; i<numDOF; i++) | ||
m_dofId[i] = firstId+i; | ||
m_nodeTags.assign(getNumNodes(), 0); | ||
updateTagsInUse(Nodes); | ||
// elements | ||
m_elementId.resize(getNumElements()); | ||
#pragma omp parallel for | ||
for (dim_t k=0; k<getNumElements(); k++) { | ||
m_elementId[k]=k; | ||
} | ||
m_elementTags.assign(getNumElements(), 0); | ||
updateTagsInUse(Elements); | ||
// face element id's | ||
m_faceId.resize(getNumFaceElements()); | ||
#pragma omp parallel for | ||
for (dim_t k=0; k<getNumFaceElements(); k++) { | ||
m_faceId[k]=k; | ||
1302 | } | } |
// generate face offset vector and set face tags | ||
const IndexVector facesPerEdge = getNumFacesPerBoundary(); | ||
const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20; | ||
const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP }; | ||
m_faceOffset.assign(facesPerEdge.size(), -1); | ||
m_faceTags.clear(); | ||
index_t offset=0; | ||
for (size_t i=0; i<facesPerEdge.size(); i++) { | ||
if (facesPerEdge[i]>0) { | ||
m_faceOffset[i]=offset; | ||
offset+=facesPerEdge[i]; | ||
m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]); | ||
} | ||
} | ||
setTagMap("left", LEFT); | ||
setTagMap("right", RIGHT); | ||
setTagMap("bottom", BOTTOM); | ||
setTagMap("top", TOP); | ||
updateTagsInUse(FaceElements); | ||
} | ||
//private | ||
int Rectangle::insertNeighbours(IndexVector& index, index_t node) const | ||
{ | ||
const index_t myN0 = (m_gNE0+1)/m_NX; | ||
const index_t myN1 = (m_gNE1+1)/m_NY; | ||
const int x=node%myN0; | ||
const int y=node/myN0; | ||
int num=0; | ||
if (y>0) { | ||
if (x>0) { | ||
// bottom-left | ||
index.push_back(node-myN0-1); | ||
num++; | ||
} | ||
// bottom | ||
index.push_back(node-myN0); | ||
num++; | ||
if (x<myN0-1) { | ||
// bottom-right | ||
index.push_back(node-myN0+1); | ||
num++; | ||
} | ||
} | ||
if (x<myN0-1) { | ||
// right | ||
index.push_back(node+1); | ||
num++; | ||
if (y<myN1-1) { | ||
// top-right | ||
index.push_back(node+myN0+1); | ||
num++; | ||
} | ||
} | ||
if (y<myN1-1) { | ||
// top | ||
index.push_back(node+myN0); | ||
num++; | ||
if (x>0) { | ||
// top-left | ||
index.push_back(node+myN0-1); | ||
num++; | ||
} | ||
} | ||
if (x>0) { | ||
// left | ||
index.push_back(node-1); | ||
num++; | ||
} | ||
return num; | ||
1303 | } | } |
1304 | ||
1305 | //protected | //protected |
# | Line 1242 void Rectangle::interpolateNodesOnElemen | Line 1308 void Rectangle::interpolateNodesOnElemen |
1308 | { | { |
1309 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1310 | if (reduced) { | if (reduced) { |
1311 | /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */ | out.requireWrite(); |
1312 | const double c0 = .25; | const double c0 = .25; |
1313 | #pragma omp parallel for | #pragma omp parallel for |
1314 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
1315 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
1316 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); |
1317 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); |
1318 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); |
1319 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); |
1320 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); |
1321 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1322 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]); | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]); |
1323 | } /* end of component loop i */ | } /* end of component loop i */ |
1324 | } /* end of k0 loop */ | } /* end of k0 loop */ |
1325 | } /* end of k1 loop */ | } /* end of k1 loop */ |
/* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */ | ||
1326 | } else { | } else { |
1327 | /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */ | out.requireWrite(); |
1328 | const double c0 = .16666666666666666667; | const double c0 = .16666666666666666667; |
1329 | const double c1 = .044658198738520451079; | const double c1 = .044658198738520451079; |
1330 | const double c2 = .62200846792814621559; | const double c2 = .62200846792814621559; |
1331 | #pragma omp parallel for | #pragma omp parallel for |
1332 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
1333 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
1334 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); |
1335 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); |
1336 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); |
1337 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); |
1338 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); |
1339 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1340 | o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]); | o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]); |
# | Line 1279 void Rectangle::interpolateNodesOnElemen | Line 1344 void Rectangle::interpolateNodesOnElemen |
1344 | } /* end of component loop i */ | } /* end of component loop i */ |
1345 | } /* end of k0 loop */ | } /* end of k0 loop */ |
1346 | } /* end of k1 loop */ | } /* end of k1 loop */ |
/* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */ | ||
1347 | } | } |
1348 | } | } |
1349 | ||
# | Line 1289 void Rectangle::interpolateNodesOnFaces( | Line 1353 void Rectangle::interpolateNodesOnFaces( |
1353 | { | { |
1354 | const dim_t numComp = in.getDataPointSize(); | const dim_t numComp = in.getDataPointSize(); |
1355 | if (reduced) { | if (reduced) { |
1356 | out.requireWrite(); | |
1357 | const double c0 = .5; | const double c0 = .5; |
1358 | #pragma omp parallel | #pragma omp parallel |
1359 | { | { |
/*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */ | ||
1360 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1361 | #pragma omp for nowait | #pragma omp for nowait |
1362 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
1363 | const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); |
1364 | const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); |
1365 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1366 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1367 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]); | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]); |
# | Line 1307 void Rectangle::interpolateNodesOnFaces( | Line 1371 void Rectangle::interpolateNodesOnFaces( |
1371 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1372 | #pragma omp for nowait | #pragma omp for nowait |
1373 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
1374 | const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); |
1375 | const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); |
1376 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1377 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1378 | o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]); | o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]); |
# | Line 1318 void Rectangle::interpolateNodesOnFaces( | Line 1382 void Rectangle::interpolateNodesOnFaces( |
1382 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1383 | #pragma omp for nowait | #pragma omp for nowait |
1384 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
1385 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); |
1386 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); |
1387 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1388 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1389 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]); | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]); |
# | Line 1329 void Rectangle::interpolateNodesOnFaces( | Line 1393 void Rectangle::interpolateNodesOnFaces( |
1393 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1394 | #pragma omp for nowait | #pragma omp for nowait |
1395 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
1396 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); |
1397 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); |
1398 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1399 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1400 | o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]); | o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]); |
1401 | } /* end of component loop i */ | } /* end of component loop i */ |
1402 | } /* end of k0 loop */ | } /* end of k0 loop */ |
1403 | } /* end of face 3 */ | } /* end of face 3 */ |
/* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */ | ||
1404 | } // end of parallel section | } // end of parallel section |
1405 | } else { | } else { |
1406 | out.requireWrite(); | |
1407 | const double c0 = 0.21132486540518711775; | const double c0 = 0.21132486540518711775; |
1408 | const double c1 = 0.78867513459481288225; | const double c1 = 0.78867513459481288225; |
1409 | #pragma omp parallel | #pragma omp parallel |
1410 | { | { |
/*** GENERATOR SNIP_INTERPOLATE_FACES TOP */ | ||
1411 | if (m_faceOffset[0] > -1) { | if (m_faceOffset[0] > -1) { |
1412 | #pragma omp for nowait | #pragma omp for nowait |
1413 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
1414 | const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); |
1415 | const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); |
1416 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1417 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1418 | o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0; | o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0; |
# | Line 1360 void Rectangle::interpolateNodesOnFaces( | Line 1423 void Rectangle::interpolateNodesOnFaces( |
1423 | if (m_faceOffset[1] > -1) { | if (m_faceOffset[1] > -1) { |
1424 | #pragma omp for nowait | #pragma omp for nowait |
1425 | for (index_t k1=0; k1 < m_NE1; ++k1) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
1426 | const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); |
1427 | const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); |
1428 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1429 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1430 | o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0; | o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0; |
# | Line 1372 void Rectangle::interpolateNodesOnFaces( | Line 1435 void Rectangle::interpolateNodesOnFaces( |
1435 | if (m_faceOffset[2] > -1) { | if (m_faceOffset[2] > -1) { |
1436 | #pragma omp for nowait | #pragma omp for nowait |
1437 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
1438 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | const double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); |
1439 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | const double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); |
1440 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1441 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1442 | o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0; | o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0; |
# | Line 1384 void Rectangle::interpolateNodesOnFaces( | Line 1447 void Rectangle::interpolateNodesOnFaces( |
1447 | if (m_faceOffset[3] > -1) { | if (m_faceOffset[3] > -1) { |
1448 | #pragma omp for nowait | #pragma omp for nowait |
1449 | for (index_t k0=0; k0 < m_NE0; ++k0) { | for (index_t k0=0; k0 < m_NE0; ++k0) { |
1450 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | const double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); |
1451 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | const double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); |
1452 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1453 | for (index_t i=0; i < numComp; ++i) { | for (index_t i=0; i < numComp; ++i) { |
1454 | o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0; | o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0; |
# | Line 1393 void Rectangle::interpolateNodesOnFaces( | Line 1456 void Rectangle::interpolateNodesOnFaces( |
1456 | } /* end of component loop i */ | } /* end of component loop i */ |
1457 | } /* end of k0 loop */ | } /* end of k0 loop */ |
1458 | } /* end of face 3 */ | } /* end of face 3 */ |
/* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */ | ||
1459 | } // end of parallel section | } // end of parallel section |
1460 | } | } |
1461 | } | } |
1462 | ||
1463 | //protected | //protected |
void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const | ||
{ | ||
const dim_t numComp = in.getDataPointSize(); | ||
out.requireWrite(); | ||
const index_t left = (m_offset0==0 ? 0 : 1); | ||
const index_t bottom = (m_offset1==0 ? 0 : 1); | ||
const index_t right = (m_mpiInfo->rank%m_NX==m_NX-1 ? m_N0 : m_N0-1); | ||
const index_t top = (m_mpiInfo->rank/m_NX==m_NY-1 ? m_N1 : m_N1-1); | ||
index_t n=0; | ||
for (index_t i=bottom; i<top; i++) { | ||
for (index_t j=left; j<right; j++, n++) { | ||
const double* src=in.getSampleDataRO(j+i*m_N0); | ||
copy(src, src+numComp, out.getSampleDataRW(n)); | ||
} | ||
} | ||
} | ||
//protected | ||
1464 | void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, | void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, |
1465 | escript::Data& rhs, const escript::Data& A, const escript::Data& B, | escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
1466 | const escript::Data& C, const escript::Data& D, | const escript::Data& C, const escript::Data& D, |
1467 | const escript::Data& X, const escript::Data& Y, | const escript::Data& X, const escript::Data& Y) const |
const escript::Data& d, const escript::Data& y) const | ||
1468 | { | { |
1469 | const double h0 = m_l0/m_gNE0; | const double h0 = m_l0/m_gNE0; |
1470 | const double h1 = m_l1/m_gNE1; | const double h1 = m_l1/m_gNE1; |
/* GENERATOR SNIP_PDE_SINGLE_PRE TOP */ | ||
1471 | const double w0 = -0.1555021169820365539*h1/h0; | const double w0 = -0.1555021169820365539*h1/h0; |
1472 | const double w1 = 0.041666666666666666667; | const double w1 = 0.041666666666666666667; |
1473 | const double w2 = -0.15550211698203655390; | |
1474 | const double w3 = 0.041666666666666666667*h0/h1; | |
1475 | const double w4 = 0.15550211698203655390; | |
1476 | const double w5 = -0.041666666666666666667; | |
1477 | const double w6 = -0.01116454968463011277*h1/h0; | |
1478 | const double w7 = 0.011164549684630112770; | |
1479 | const double w8 = -0.011164549684630112770; | |
1480 | const double w9 = -0.041666666666666666667*h1/h0; | |
1481 | const double w10 = -0.041666666666666666667*h0/h1; | const double w10 = -0.041666666666666666667*h0/h1; |
1482 | const double w11 = 0.1555021169820365539*h1/h0; | const double w11 = 0.1555021169820365539*h1/h0; |
1483 | const double w12 = 0.1555021169820365539*h0/h1; | const double w12 = 0.1555021169820365539*h0/h1; |
# | Line 1438 void Rectangle::assemblePDESingle(Paso_S | Line 1487 void Rectangle::assemblePDESingle(Paso_S |
1487 | const double w16 = -0.01116454968463011277*h0/h1; | const double w16 = -0.01116454968463011277*h0/h1; |
1488 | const double w17 = -0.1555021169820365539*h0/h1; | const double w17 = -0.1555021169820365539*h0/h1; |
1489 | const double w18 = -0.33333333333333333333*h1/h0; | const double w18 = -0.33333333333333333333*h1/h0; |
1490 | const double w19 = 0.25000000000000000000; | const double w19 = 0.25; |
1491 | const double w2 = -0.15550211698203655390; | const double w20 = -0.25; |
const double w20 = -0.25000000000000000000; | ||
1492 | const double w21 = 0.16666666666666666667*h0/h1; | const double w21 = 0.16666666666666666667*h0/h1; |
1493 | const double w22 = -0.16666666666666666667*h1/h0; | const double w22 = -0.16666666666666666667*h1/h0; |
1494 | const double w23 = -0.16666666666666666667*h0/h1; | const double w23 = -0.16666666666666666667*h0/h1; |
# | Line 1450 void Rectangle::assemblePDESingle(Paso_S | Line 1498 void Rectangle::assemblePDESingle(Paso_S |
1498 | const double w27 = -0.33333333333333333333*h0/h1; | const double w27 = -0.33333333333333333333*h0/h1; |
1499 | const double w28 = -0.032861463941450536761*h1; | const double w28 = -0.032861463941450536761*h1; |
1500 | const double w29 = -0.032861463941450536761*h0; | const double w29 = -0.032861463941450536761*h0; |
const double w3 = 0.041666666666666666667*h0/h1; | ||
1501 | const double w30 = -0.12264065304058601714*h1; | const double w30 = -0.12264065304058601714*h1; |
1502 | const double w31 = -0.0023593469594139828636*h1; | const double w31 = -0.0023593469594139828636*h1; |
1503 | const double w32 = -0.008805202725216129906*h0; | const double w32 = -0.008805202725216129906*h0; |
# | Line 1461 void Rectangle::assemblePDESingle(Paso_S | Line 1508 void Rectangle::assemblePDESingle(Paso_S |
1508 | const double w37 = 0.0023593469594139828636*h1; | const double w37 = 0.0023593469594139828636*h1; |
1509 | const double w38 = 0.12264065304058601714*h1; | const double w38 = 0.12264065304058601714*h1; |
1510 | const double w39 = 0.032861463941450536761*h0; | const double w39 = 0.032861463941450536761*h0; |
const double w4 = 0.15550211698203655390; | ||
1511 | const double w40 = -0.12264065304058601714*h0; | const double w40 = -0.12264065304058601714*h0; |
1512 | const double w41 = -0.0023593469594139828636*h0; | const double w41 = -0.0023593469594139828636*h0; |
1513 | const double w42 = 0.0023593469594139828636*h0; | const double w42 = 0.0023593469594139828636*h0; |
# | Line 1472 void Rectangle::assemblePDESingle(Paso_S | Line 1518 void Rectangle::assemblePDESingle(Paso_S |
1518 | const double w47 = 0.16666666666666666667*h1; | const double w47 = 0.16666666666666666667*h1; |
1519 | const double w48 = 0.083333333333333333333*h0; | const double w48 = 0.083333333333333333333*h0; |
1520 | const double w49 = -0.16666666666666666667*h0; | const double w49 = -0.16666666666666666667*h0; |
const double w5 = -0.041666666666666666667; | ||
1521 | const double w50 = 0.16666666666666666667*h0; | const double w50 = 0.16666666666666666667*h0; |
1522 | const double w51 = -0.083333333333333333333*h1; | const double w51 = -0.083333333333333333333*h1; |
1523 | const double w52 = 0.025917019497006092316*h0*h1; | const double w52 = 0.025917019497006092316*h0*h1; |
# | Line 1483 void Rectangle::assemblePDESingle(Paso_S | Line 1528 void Rectangle::assemblePDESingle(Paso_S |
1528 | const double w57 = 0.055555555555555555556*h0*h1; | const double w57 = 0.055555555555555555556*h0*h1; |
1529 | const double w58 = 0.027777777777777777778*h0*h1; | const double w58 = 0.027777777777777777778*h0*h1; |
1530 | const double w59 = 0.11111111111111111111*h0*h1; | const double w59 = 0.11111111111111111111*h0*h1; |
const double w6 = -0.01116454968463011277*h1/h0; | ||
1531 | const double w60 = -0.19716878364870322056*h1; | const double w60 = -0.19716878364870322056*h1; |
1532 | const double w61 = -0.19716878364870322056*h0; | const double w61 = -0.19716878364870322056*h0; |
1533 | const double w62 = -0.052831216351296779436*h0; | const double w62 = -0.052831216351296779436*h0; |
# | Line 1494 void Rectangle::assemblePDESingle(Paso_S | Line 1538 void Rectangle::assemblePDESingle(Paso_S |
1538 | const double w67 = 0.052831216351296779436*h0; | const double w67 = 0.052831216351296779436*h0; |
1539 | const double w68 = -0.5*h1; | const double w68 = -0.5*h1; |
1540 | const double w69 = -0.5*h0; | const double w69 = -0.5*h0; |
const double w7 = 0.011164549684630112770; | ||
1541 | const double w70 = 0.5*h1; | const double w70 = 0.5*h1; |
1542 | const double w71 = 0.5*h0; | const double w71 = 0.5*h0; |
1543 | const double w72 = 0.1555021169820365539*h0*h1; | const double w72 = 0.1555021169820365539*h0*h1; |
1544 | const double w73 = 0.041666666666666666667*h0*h1; | const double w73 = 0.041666666666666666667*h0*h1; |
1545 | const double w74 = 0.01116454968463011277*h0*h1; | const double w74 = 0.01116454968463011277*h0*h1; |
1546 | const double w75 = 0.25*h0*h1; | const double w75 = 0.25*h0*h1; |
const double w8 = -0.011164549684630112770; | ||
const double w9 = -0.041666666666666666667*h1/h0; | ||
/* GENERATOR SNIP_PDE_SINGLE_PRE BOTTOM */ | ||
1547 | ||
1548 | rhs.requireWrite(); | rhs.requireWrite(); |
1549 | #pragma omp parallel | #pragma omp parallel |
1550 | { | { |
1551 | for (index_t k1_0=0; k1_0<2; k1_0++) { // coloring | for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
1552 | #pragma omp for | #pragma omp for |
1553 | for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | for (index_t k1=k1_0; k1<m_NE1; k1+=2) { |
1554 | for (index_t k0=0; k0<m_NE0; ++k0) { | for (index_t k0=0; k0<m_NE0; ++k0) { |
# | Line 1517 void Rectangle::assemblePDESingle(Paso_S | Line 1557 void Rectangle::assemblePDESingle(Paso_S |
1557 | vector<double> EM_S(4*4, 0); | vector<double> EM_S(4*4, 0); |
1558 | vector<double> EM_F(4, 0); | vector<double> EM_F(4, 0); |
1559 | const index_t e = k0 + m_NE0*k1; | const index_t e = k0 + m_NE0*k1; |
/* GENERATOR SNIP_PDE_SINGLE TOP */ | ||
1560 | /////////////// | /////////////// |
1561 | // process A // | // process A // |
1562 | /////////////// | /////////////// |
# | Line 1525 void Rectangle::assemblePDESingle(Paso_S | Line 1564 void Rectangle::assemblePDESingle(Paso_S |
1564 | add_EM_S=true; | add_EM_S=true; |
1565 | const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); |
1566 | if (A.actsExpanded()) { | if (A.actsExpanded()) { |
1567 | const register double A_00_0 = A_p[INDEX3(0,0,0,2,2)]; | const double A_00_0 = A_p[INDEX3(0,0,0,2,2)]; |
1568 | const register double A_01_0 = A_p[INDEX3(0,1,0,2,2)]; | const double A_10_0 = A_p[INDEX3(1,0,0,2,2)]; |
1569 | const register double A_10_0 = A_p[INDEX3(1,0,0,2,2)]; | const double A_01_0 = A_p[INDEX3(0,1,0,2,2)]; |
1570 | const register double A_11_0 = A_p[INDEX3(1,1,0,2,2)]; | const double A_11_0 = A_p[INDEX3(1,1,0,2,2)]; |
1571 | const register double A_00_1 = A_p[INDEX3(0,0,1,2,2)]; | const double A_00_1 = A_p[INDEX3(0,0,1,2,2)]; |
1572 | const register double A_01_1 = A_p[INDEX3(0,1,1,2,2)]; | const double A_10_1 = A_p[INDEX3(1,0,1,2,2)]; |
1573 | const register double A_10_1 = A_p[INDEX3(1,0,1,2,2)]; | const double A_01_1 = A_p[INDEX3(0,1,1,2,2)]; |
1574 | const register double A_11_1 = A_p[INDEX3(1,1,1,2,2)]; | const double A_11_1 = A_p[INDEX3(1,1,1,2,2)]; |
1575 | const register double A_00_2 = A_p[INDEX3(0,0,2,2,2)]; | const double A_00_2 = A_p[INDEX3(0,0,2,2,2)]; |
1576 | const register double A_01_2 = A_p[INDEX3(0,1,2,2,2)]; | const double A_10_2 = A_p[INDEX3(1,0,2,2,2)]; |
1577 | const register double A_10_2 = A_p[INDEX3(1,0,2,2,2)]; | const double A_01_2 = A_p[INDEX3(0,1,2,2,2)]; |
1578 | const register double A_11_2 = A_p[INDEX3(1,1,2,2,2)]; | const double A_11_2 = A_p[INDEX3(1,1,2,2,2)]; |
1579 | const register double A_00_3 = A_p[INDEX3(0,0,3,2,2)]; | const double A_00_3 = A_p[INDEX3(0,0,3,2,2)]; |
1580 | const register double A_01_3 = A_p[INDEX3(0,1,3,2,2)]; | const double A_10_3 = A_p[INDEX3(1,0,3,2,2)]; |
1581 | const register double A_10_3 = A_p[INDEX3(1,0,3,2,2)]; | const double A_01_3 = A_p[INDEX3(0,1,3,2,2)]; |
1582 | const register double A_11_3 = A_p[INDEX3(1,1,3,2,2)]; | const double A_11_3 = A_p[INDEX3(1,1,3,2,2)]; |
1583 | const register double tmp4_0 = A_10_1 + A_10_2; | const double tmp0_0 = A_01_0 + A_01_3; |
1584 | const register double tmp12_0 = A_11_0 + A_11_2; | const double tmp1_0 = A_00_0 + A_00_1; |
1585 | const register double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; | const double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; |
1586 | const register double tmp10_0 = A_01_3 + A_10_3; | const double tmp3_0 = A_00_2 + A_00_3; |
1587 | const register double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; | const double tmp4_0 = A_10_1 + A_10_2; |
1588 | const register double tmp0_0 = A_01_0 + A_01_3; | const double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; |
1589 | const register double tmp13_0 = A_01_2 + A_10_1; | const double tmp6_0 = A_01_3 + A_10_0; |
1590 | const register double tmp3_0 = A_00_2 + A_00_3; | const double tmp7_0 = A_01_0 + A_10_3; |
1591 | const register double tmp11_0 = A_11_1 + A_11_3; | const double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; |
1592 | const register double tmp18_0 = A_01_1 + A_10_1; | const double tmp9_0 = A_01_0 + A_10_0; |
1593 | const register double tmp1_0 = A_00_0 + A_00_1; | const double tmp12_0 = A_11_0 + A_11_2; |
1594 | const register double tmp15_0 = A_01_1 + A_10_2; | const double tmp10_0 = A_01_3 + A_10_3; |
1595 | const register double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; | const double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; |
1596 | const register double tmp16_0 = A_10_0 + A_10_3; | const double tmp13_0 = A_01_2 + A_10_1; |
1597 | const register double tmp6_0 = A_01_3 + A_10_0; | const double tmp11_0 = A_11_1 + A_11_3; |
1598 | const register double tmp17_0 = A_01_1 + A_01_2; | const double tmp18_0 = A_01_1 + A_10_1; |
1599 | const register double tmp9_0 = A_01_0 + A_10_0; | const double tmp15_0 = A_01_1 + A_10_2; |
1600 | const register double tmp7_0 = A_01_0 + A_10_3; | const double tmp16_0 = A_10_0 + A_10_3; |
1601 | const register double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; | const double tmp17_0 = A_01_1 + A_01_2; |
1602 | const register double tmp19_0 = A_01_2 + A_10_2; | const double tmp19_0 = A_01_2 + A_10_2; |
1603 | const register double tmp14_1 = A_10_0*w8; | const double tmp0_1 = A_10_3*w8; |
1604 | const register double tmp23_1 = tmp3_0*w14; | const double tmp1_1 = tmp0_0*w1; |
1605 | const register double tmp35_1 = A_01_0*w8; | const double tmp2_1 = A_01_1*w4; |
1606 | const register double tmp54_1 = tmp13_0*w8; | const double tmp3_1 = tmp1_0*w0; |
1607 | const register double tmp20_1 = tmp9_0*w4; | const double tmp4_1 = A_01_2*w7; |
1608 | const register double tmp25_1 = tmp12_0*w12; | const double tmp5_1 = tmp2_0*w3; |
1609 | const register double tmp2_1 = A_01_1*w4; | const double tmp6_1 = tmp3_0*w6; |
1610 | const register double tmp44_1 = tmp7_0*w7; | const double tmp7_1 = A_10_0*w2; |
1611 | const register double tmp26_1 = tmp10_0*w4; | const double tmp8_1 = tmp4_0*w5; |
1612 | const register double tmp52_1 = tmp18_0*w8; | const double tmp9_1 = tmp2_0*w10; |
1613 | const register double tmp48_1 = A_10_1*w7; | const double tmp14_1 = A_10_0*w8; |
1614 | const register double tmp46_1 = A_01_3*w8; | const double tmp23_1 = tmp3_0*w14; |
1615 | const register double tmp50_1 = A_01_0*w2; | const double tmp35_1 = A_01_0*w8; |
1616 | const register double tmp8_1 = tmp4_0*w5; | const double tmp54_1 = tmp13_0*w8; |
1617 | const register double tmp56_1 = tmp19_0*w8; | const double tmp20_1 = tmp9_0*w4; |
1618 | const register double tmp9_1 = tmp2_0*w10; | const double tmp25_1 = tmp12_0*w12; |
1619 | const register double tmp19_1 = A_10_3*w2; | const double tmp44_1 = tmp7_0*w7; |
1620 | const register double tmp47_1 = A_10_2*w4; | const double tmp26_1 = tmp10_0*w4; |
1621 | const register double tmp16_1 = tmp3_0*w0; | const double tmp52_1 = tmp18_0*w8; |
1622 | const register double tmp18_1 = tmp1_0*w6; | const double tmp48_1 = A_10_1*w7; |
1623 | const register double tmp31_1 = tmp11_0*w12; | const double tmp46_1 = A_01_3*w8; |
1624 | const register double tmp55_1 = tmp15_0*w2; | const double tmp50_1 = A_01_0*w2; |
1625 | const register double tmp39_1 = A_10_2*w7; | const double tmp56_1 = tmp19_0*w8; |
1626 | const register double tmp11_1 = tmp6_0*w7; | const double tmp19_1 = A_10_3*w2; |
1627 | const register double tmp40_1 = tmp11_0*w17; | const double tmp47_1 = A_10_2*w4; |
1628 | const register double tmp34_1 = tmp15_0*w8; | const double tmp16_1 = tmp3_0*w0; |
1629 | const register double tmp33_1 = tmp14_0*w5; | const double tmp18_1 = tmp1_0*w6; |
1630 | const register double tmp24_1 = tmp11_0*w13; | const double tmp31_1 = tmp11_0*w12; |
1631 | const register double tmp3_1 = tmp1_0*w0; | const double tmp55_1 = tmp15_0*w2; |
1632 | const register double tmp5_1 = tmp2_0*w3; | const double tmp39_1 = A_10_2*w7; |
1633 | const register double tmp43_1 = tmp17_0*w5; | const double tmp11_1 = tmp6_0*w7; |
1634 | const register double tmp15_1 = A_01_2*w4; | const double tmp40_1 = tmp11_0*w17; |
1635 | const register double tmp53_1 = tmp19_0*w2; | const double tmp34_1 = tmp15_0*w8; |
1636 | const register double tmp27_1 = tmp3_0*w11; | const double tmp33_1 = tmp14_0*w5; |
1637 | const register double tmp32_1 = tmp13_0*w2; | const double tmp24_1 = tmp11_0*w13; |
1638 | const register double tmp10_1 = tmp5_0*w9; | const double tmp43_1 = tmp17_0*w5; |
1639 | const register double tmp37_1 = A_10_1*w4; | const double tmp15_1 = A_01_2*w4; |
1640 | const register double tmp38_1 = tmp5_0*w15; | const double tmp53_1 = tmp19_0*w2; |
1641 | const register double tmp17_1 = A_01_1*w7; | const double tmp27_1 = tmp3_0*w11; |
1642 | const register double tmp12_1 = tmp7_0*w4; | const double tmp32_1 = tmp13_0*w2; |
1643 | const register double tmp22_1 = tmp10_0*w7; | const double tmp10_1 = tmp5_0*w9; |
1644 | const register double tmp57_1 = tmp18_0*w2; | const double tmp37_1 = A_10_1*w4; |
1645 | const register double tmp28_1 = tmp9_0*w7; | const double tmp38_1 = tmp5_0*w15; |
1646 | const register double tmp29_1 = tmp1_0*w14; | const double tmp17_1 = A_01_1*w7; |
1647 | const register double tmp51_1 = tmp11_0*w16; | const double tmp12_1 = tmp7_0*w4; |
1648 | const register double tmp42_1 = tmp12_0*w16; | const double tmp22_1 = tmp10_0*w7; |
1649 | const register double tmp49_1 = tmp12_0*w17; | const double tmp57_1 = tmp18_0*w2; |
1650 | const register double tmp21_1 = tmp1_0*w11; | const double tmp28_1 = tmp9_0*w7; |
1651 | const register double tmp1_1 = tmp0_0*w1; | const double tmp29_1 = tmp1_0*w14; |
1652 | const register double tmp45_1 = tmp6_0*w4; | const double tmp51_1 = tmp11_0*w16; |
1653 | const register double tmp7_1 = A_10_0*w2; | const double tmp42_1 = tmp12_0*w16; |
1654 | const register double tmp6_1 = tmp3_0*w6; | const double tmp49_1 = tmp12_0*w17; |
1655 | const register double tmp13_1 = tmp8_0*w1; | const double tmp21_1 = tmp1_0*w11; |
1656 | const register double tmp36_1 = tmp16_0*w1; | const double tmp45_1 = tmp6_0*w4; |
1657 | const register double tmp41_1 = A_01_3*w2; | const double tmp13_1 = tmp8_0*w1; |
1658 | const register double tmp30_1 = tmp12_0*w13; | const double tmp36_1 = tmp16_0*w1; |
1659 | const register double tmp4_1 = A_01_2*w7; | const double tmp41_1 = A_01_3*w2; |
1660 | const register double tmp0_1 = A_10_3*w8; | const double tmp30_1 = tmp12_0*w13; |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; | ||
1661 | EM_S[INDEX2(0,0,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; | EM_S[INDEX2(0,0,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; |
1662 | EM_S[INDEX2(3,3,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | EM_S[INDEX2(1,0,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; |
1663 | EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; | |
1664 | EM_S[INDEX2(3,0,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; | EM_S[INDEX2(3,0,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; |
1665 | EM_S[INDEX2(3,1,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; |
1666 | EM_S[INDEX2(1,1,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; | |
1667 | EM_S[INDEX2(2,1,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; | EM_S[INDEX2(2,1,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; |
1668 | EM_S[INDEX2(3,1,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; | |
1669 | EM_S[INDEX2(0,2,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | EM_S[INDEX2(0,2,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
1670 | EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; | EM_S[INDEX2(1,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; |
EM_S[INDEX2(1,3,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; | ||
1671 | EM_S[INDEX2(2,2,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; | EM_S[INDEX2(2,2,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; |
1672 | EM_S[INDEX2(1,0,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; | EM_S[INDEX2(3,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; |
1673 | EM_S[INDEX2(0,3,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; | EM_S[INDEX2(0,3,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; |
1674 | EM_S[INDEX2(1,1,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; | EM_S[INDEX2(1,3,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; |
1675 | } else { /* constant data */ | EM_S[INDEX2(2,3,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; |
1676 | const register double A_00 = A_p[INDEX2(0,0,2)]; | EM_S[INDEX2(3,3,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1677 | const register double A_01 = A_p[INDEX2(0,1,2)]; | } else { // constant data |
1678 | const register double A_10 = A_p[INDEX2(1,0,2)]; | const double A_00 = A_p[INDEX2(0,0,2)]; |
1679 | const register double A_11 = A_p[INDEX2(1,1,2)]; | const double A_10 = A_p[INDEX2(1,0,2)]; |
1680 | const register double tmp0_0 = A_01 + A_10; | const double A_01 = A_p[INDEX2(0,1,2)]; |
1681 | const register double tmp0_1 = A_00*w18; | const double A_11 = A_p[INDEX2(1,1,2)]; |
1682 | const register double tmp10_1 = A_01*w20; | const double tmp0_0 = A_01 + A_10; |
1683 | const register double tmp12_1 = A_00*w26; | const double tmp0_1 = A_00*w18; |
1684 | const register double tmp4_1 = A_00*w22; | const double tmp1_1 = A_01*w19; |
1685 | const register double tmp8_1 = A_00*w24; | const double tmp2_1 = A_10*w20; |
1686 | const register double tmp13_1 = A_10*w19; | const double tmp3_1 = A_11*w21; |
1687 | const register double tmp9_1 = tmp0_0*w20; | const double tmp4_1 = A_00*w22; |
1688 | const register double tmp3_1 = A_11*w21; | const double tmp5_1 = tmp0_0*w19; |
1689 | const register double tmp11_1 = A_11*w27; | const double tmp6_1 = A_11*w23; |
1690 | const register double tmp1_1 = A_01*w19; | const double tmp7_1 = A_11*w25; |
1691 | const register double tmp6_1 = A_11*w23; | const double tmp8_1 = A_00*w24; |
1692 | const register double tmp7_1 = A_11*w25; | const double tmp9_1 = tmp0_0*w20; |
1693 | const register double tmp2_1 = A_10*w20; | const double tmp10_1 = A_01*w20; |
1694 | const register double tmp5_1 = tmp0_0*w19; | const double tmp11_1 = A_11*w27; |
1695 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | const double tmp12_1 = A_00*w26; |
1696 | EM_S[INDEX2(1,2,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | const double tmp13_1 = A_10*w19; |
EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | ||
1697 | EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
1698 | EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
1699 | EM_S[INDEX2(2,0,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | |
1700 | EM_S[INDEX2(3,0,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | EM_S[INDEX2(3,0,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
1701 | EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
1702 | EM_S[INDEX2(1,1,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | |
1703 | EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
1704 | EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | |
1705 | EM_S[INDEX2(0,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | EM_S[INDEX2(0,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
1706 | EM_S[INDEX2(2,0,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | EM_S[INDEX2(1,2,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
EM_S[INDEX2(1,3,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | ||
1707 | EM_S[INDEX2(2,2,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | EM_S[INDEX2(2,2,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
1708 | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
1709 | EM_S[INDEX2(0,3,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | EM_S[INDEX2(0,3,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
1710 | EM_S[INDEX2(1,1,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | EM_S[INDEX2(1,3,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
1711 | EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | |
1712 | EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | |
1713 | } | } |
1714 | } | } |
1715 | /////////////// | /////////////// |
# | Line 1680 void Rectangle::assemblePDESingle(Paso_S | Line 1719 void Rectangle::assemblePDESingle(Paso_S |
1719 | add_EM_S=true; | add_EM_S=true; |
1720 | const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); | const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); |
1721 | if (B.actsExpanded()) { | if (B.actsExpanded()) { |
1722 | const register double B_0_0 = B_p[INDEX2(0,0,2)]; | const double B_0_0 = B_p[INDEX2(0,0,2)]; |
1723 | const register double B_1_0 = B_p[INDEX2(1,0,2)]; | const double B_1_0 = B_p[INDEX2(1,0,2)]; |
1724 | const register double B_0_1 = B_p[INDEX2(0,1,2)]; | const double B_0_1 = B_p[INDEX2(0,1,2)]; |
1725 | const register double B_1_1 = B_p[INDEX2(1,1,2)]; | const double B_1_1 = B_p[INDEX2(1,1,2)]; |
1726 | const register double B_0_2 = B_p[INDEX2(0,2,2)]; | const double B_0_2 = B_p[INDEX2(0,2,2)]; |
1727 | const register double B_1_2 = B_p[INDEX2(1,2,2)]; | const double B_1_2 = B_p[INDEX2(1,2,2)]; |
1728 | const register double B_0_3 = B_p[INDEX2(0,3,2)]; | const double B_0_3 = B_p[INDEX2(0,3,2)]; |
1729 | const register double B_1_3 = B_p[INDEX2(1,3,2)]; | const double B_1_3 = B_p[INDEX2(1,3,2)]; |
1730 | const register double tmp3_0 = B_0_0 + B_0_2; | const double tmp0_0 = B_1_0 + B_1_1; |
1731 | const register double tmp1_0 = B_1_2 + B_1_3; | const double tmp1_0 = B_1_2 + B_1_3; |
1732 | const register double tmp2_0 = B_0_1 + B_0_3; | const double tmp2_0 = B_0_1 + B_0_3; |
1733 | const register double tmp0_0 = B_1_0 + B_1_1; | const double tmp3_0 = B_0_0 + B_0_2; |
1734 | const register double tmp63_1 = B_1_1*w42; | const double tmp63_1 = B_1_1*w42; |
1735 | const register double tmp79_1 = B_1_1*w40; | const double tmp79_1 = B_1_1*w40; |
1736 | const register double tmp37_1 = tmp3_0*w35; | const double tmp37_1 = tmp3_0*w35; |
1737 | const register double tmp8_1 = tmp0_0*w32; | const double tmp8_1 = tmp0_0*w32; |
1738 | const register double tmp71_1 = B_0_1*w34; | const double tmp71_1 = B_0_1*w34; |
1739 | const register double tmp19_1 = B_0_3*w31; | const double tmp19_1 = B_0_3*w31; |
1740 | const register double tmp15_1 = B_0_3*w34; | const double tmp15_1 = B_0_3*w34; |
1741 | const register double tmp9_1 = tmp3_0*w34; | const double tmp9_1 = tmp3_0*w34; |
1742 | const register double tmp35_1 = B_1_0*w36; | const double tmp35_1 = B_1_0*w36; |
1743 | const register double tmp66_1 = B_0_3*w28; | const double tmp66_1 = B_0_3*w28; |
1744 | const register double tmp28_1 = B_1_0*w42; | const double tmp28_1 = B_1_0*w42; |
1745 | const register double tmp22_1 = B_1_0*w40; | const double tmp22_1 = B_1_0*w40; |
1746 | const register double tmp16_1 = B_1_2*w29; | const double tmp16_1 = B_1_2*w29; |
1747 | const register double tmp6_1 = tmp2_0*w35; | const double tmp6_1 = tmp2_0*w35; |
1748 | const register double tmp55_1 = B_1_3*w40; | const double tmp55_1 = B_1_3*w40; |
1749 | const register double tmp50_1 = B_1_3*w42; | const double tmp50_1 = B_1_3*w42; |
1750 | const register double tmp7_1 = tmp1_0*w29; | const double tmp7_1 = tmp1_0*w29; |
1751 | const register double tmp1_1 = tmp1_0*w32; | const double tmp1_1 = tmp1_0*w32; |
1752 | const register double tmp57_1 = B_0_3*w30; | const double tmp57_1 = B_0_3*w30; |
1753 | const register double tmp18_1 = B_1_1*w32; | const double tmp18_1 = B_1_1*w32; |
1754 | const register double tmp53_1 = B_1_0*w41; | const double tmp53_1 = B_1_0*w41; |
1755 | const register double tmp61_1 = B_1_3*w36; | const double tmp61_1 = B_1_3*w36; |
1756 | const register double tmp27_1 = B_0_3*w38; | const double tmp27_1 = B_0_3*w38; |
1757 | const register double tmp64_1 = B_0_2*w30; | const double tmp64_1 = B_0_2*w30; |
1758 | const register double tmp76_1 = B_0_1*w38; | const double tmp76_1 = B_0_1*w38; |
1759 | const register double tmp39_1 = tmp2_0*w34; | const double tmp39_1 = tmp2_0*w34; |
1760 | const register double tmp62_1 = B_0_1*w31; | const double tmp62_1 = B_0_1*w31; |
1761 | const register double tmp56_1 = B_0_0*w31; | const double tmp56_1 = B_0_0*w31; |
1762 | const register double tmp49_1 = B_1_1*w36; | const double tmp49_1 = B_1_1*w36; |
1763 | const register double tmp2_1 = B_0_2*w31; | const double tmp2_1 = B_0_2*w31; |
1764 | const register double tmp23_1 = B_0_2*w33; | const double tmp23_1 = B_0_2*w33; |
1765 | const register double tmp38_1 = B_1_1*w43; | const double tmp38_1 = B_1_1*w43; |
1766 | const register double tmp74_1 = B_1_2*w41; | const double tmp74_1 = B_1_2*w41; |
1767 | const register double tmp43_1 = B_1_1*w41; | const double tmp43_1 = B_1_1*w41; |
1768 | const register double tmp58_1 = B_0_2*w28; | const double tmp58_1 = B_0_2*w28; |
1769 | const register double tmp67_1 = B_0_0*w33; | const double tmp67_1 = B_0_0*w33; |
1770 | const register double tmp33_1 = tmp0_0*w39; | const double tmp33_1 = tmp0_0*w39; |
1771 | const register double tmp4_1 = B_0_0*w28; | const double tmp4_1 = B_0_0*w28; |
1772 | const register double tmp20_1 = B_0_0*w30; | const double tmp20_1 = B_0_0*w30; |
1773 | const register double tmp13_1 = B_0_2*w38; | const double tmp13_1 = B_0_2*w38; |
1774 | const register double tmp65_1 = B_1_2*w43; | const double tmp65_1 = B_1_2*w43; |
1775 | const register double tmp0_1 = tmp0_0*w29; | const double tmp0_1 = tmp0_0*w29; |
1776 | const register double tmp41_1 = tmp3_0*w33; | const double tmp41_1 = tmp3_0*w33; |
1777 | const register double tmp73_1 = B_0_2*w37; | const double tmp73_1 = B_0_2*w37; |
1778 | const register double tmp69_1 = B_0_0*w38; | const double tmp69_1 = B_0_0*w38; |
1779 | const register double tmp48_1 = B_1_2*w39; | const double tmp48_1 = B_1_2*w39; |
1780 | const register double tmp59_1 = B_0_1*w33; | const double tmp59_1 = B_0_1*w33; |
1781 | const register double tmp17_1 = B_1_3*w41; | const double tmp17_1 = B_1_3*w41; |
1782 | const register double tmp5_1 = B_0_3*w33; | const double tmp5_1 = B_0_3*w33; |
1783 | const register double tmp3_1 = B_0_1*w30; | const double tmp3_1 = B_0_1*w30; |
1784 | const register double tmp21_1 = B_0_1*w28; | const double tmp21_1 = B_0_1*w28; |
1785 | const register double tmp42_1 = B_1_0*w29; | const double tmp42_1 = B_1_0*w29; |
1786 | const register double tmp54_1 = B_1_2*w32; | const double tmp54_1 = B_1_2*w32; |
1787 | const register double tmp60_1 = B_1_0*w39; | const double tmp60_1 = B_1_0*w39; |
1788 | const register double tmp32_1 = tmp1_0*w36; | const double tmp32_1 = tmp1_0*w36; |
1789 | const register double tmp10_1 = B_0_1*w37; | const double tmp10_1 = B_0_1*w37; |
1790 | const register double tmp14_1 = B_0_0*w35; | const double tmp14_1 = B_0_0*w35; |
1791 | const register double tmp29_1 = B_0_1*w35; | const double tmp29_1 = B_0_1*w35; |
1792 | const register double tmp26_1 = B_1_2*w36; | const double tmp26_1 = B_1_2*w36; |
1793 | const register double tmp30_1 = B_1_3*w43; | const double tmp30_1 = B_1_3*w43; |
1794 | const register double tmp70_1 = B_0_2*w35; | const double tmp70_1 = B_0_2*w35; |
1795 | const register double tmp34_1 = B_1_3*w39; | const double tmp34_1 = B_1_3*w39; |
1796 | const register double tmp51_1 = B_1_0*w43; | const double tmp51_1 = B_1_0*w43; |
1797 | const register double tmp31_1 = B_0_2*w34; | const double tmp31_1 = B_0_2*w34; |
1798 | const register double tmp45_1 = tmp3_0*w28; | const double tmp45_1 = tmp3_0*w28; |
1799 | const register double tmp11_1 = tmp1_0*w39; | const double tmp11_1 = tmp1_0*w39; |
1800 | const register double tmp52_1 = B_1_1*w29; | const double tmp52_1 = B_1_1*w29; |
1801 | const register double tmp44_1 = B_1_3*w32; | const double tmp44_1 = B_1_3*w32; |
1802 | const register double tmp25_1 = B_1_1*w39; | const double tmp25_1 = B_1_1*w39; |
1803 | const register double tmp47_1 = tmp2_0*w33; | const double tmp47_1 = tmp2_0*w33; |
1804 | const register double tmp72_1 = B_1_3*w29; | const double tmp72_1 = B_1_3*w29; |
1805 | const register double tmp40_1 = tmp2_0*w28; | const double tmp40_1 = tmp2_0*w28; |
1806 | const register double tmp46_1 = B_1_2*w40; | const double tmp46_1 = B_1_2*w40; |
1807 | const register double tmp36_1 = B_1_2*w42; | const double tmp36_1 = B_1_2*w42; |
1808 | const register double tmp24_1 = B_0_0*w37; | const double tmp24_1 = B_0_0*w37; |
1809 | const register double tmp77_1 = B_0_3*w35; | const double tmp77_1 = B_0_3*w35; |
1810 | const register double tmp68_1 = B_0_3*w37; | const double tmp68_1 = B_0_3*w37; |
1811 | const register double tmp78_1 = B_0_0*w34; | const double tmp78_1 = B_0_0*w34; |
1812 | const register double tmp12_1 = tmp0_0*w36; | const double tmp12_1 = tmp0_0*w36; |
1813 | const register double tmp75_1 = B_1_0*w32; | const double tmp75_1 = B_1_0*w32; |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
1814 | EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
1815 | EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
1816 | EM_S[INDEX2(2,0,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | |
1817 | EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; | EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; |
1818 | EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
1819 | EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | |
1820 | EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
1821 | EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | |
1822 | EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
1823 | EM_S[INDEX2(2,0,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | ||
1824 | EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
1825 | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
1826 | EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; | EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; |
1827 | EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
1828 | } else { /* constant data */ | EM_S[INDEX2(2,3,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
1829 | const register double B_0 = B_p[0]; | EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1830 | const register double B_1 = B_p[1]; | } else { // constant data |
1831 | const register double tmp6_1 = B_1*w50; | const double B_0 = B_p[0]; |
1832 | const register double tmp1_1 = B_1*w45; | const double B_1 = B_p[1]; |
1833 | const register double tmp5_1 = B_1*w49; | const double tmp0_1 = B_0*w44; |
1834 | const register double tmp4_1 = B_1*w48; | const double tmp1_1 = B_1*w45; |
1835 | const register double tmp0_1 = B_0*w44; | const double tmp2_1 = B_0*w46; |
1836 | const register double tmp2_1 = B_0*w46; | const double tmp3_1 = B_0*w47; |
1837 | const register double tmp7_1 = B_0*w51; | const double tmp4_1 = B_1*w48; |
1838 | const register double tmp3_1 = B_0*w47; | const double tmp5_1 = B_1*w49; |
1839 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | const double tmp6_1 = B_1*w50; |
1840 | EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; | const double tmp7_1 = B_0*w51; |
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; | ||
1841 | EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp5_1; | EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp5_1; |
1842 | EM_S[INDEX2(3,3,4)]+=tmp3_1 + tmp6_1; | EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; |
1843 | EM_S[INDEX2(2,0,4)]+=tmp6_1 + tmp7_1; | |
1844 | EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp4_1; | EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp4_1; |
1845 | EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp6_1; | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
1846 | EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp5_1; | |
1847 | EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp7_1; | EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp7_1; |
1848 | EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp6_1; | |
1849 | EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp7_1; | EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp7_1; |
1850 | EM_S[INDEX2(2,0,4)]+=tmp6_1 + tmp7_1; | EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; |
EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp5_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp4_1; | ||
1851 | EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1; | EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1; |
1852 | EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; | EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; |
1853 | EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp7_1; | EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp7_1; |
1854 | EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp5_1; | EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp5_1; |
1855 | EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp4_1; | |
1856 | EM_S[INDEX2(3,3,4)]+=tmp3_1 + tmp6_1; | |
1857 | } | } |
1858 | } | } |
1859 | /////////////// | /////////////// |
# | Line 1824 void Rectangle::assemblePDESingle(Paso_S | Line 1863 void Rectangle::assemblePDESingle(Paso_S |
1863 | add_EM_S=true; | add_EM_S=true; |
1864 | const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); | const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); |
1865 | if (C.actsExpanded()) { | if (C.actsExpanded()) { |
1866 | const register double C_0_0 = C_p[INDEX2(0,0,2)]; | const double C_0_0 = C_p[INDEX2(0,0,2)]; |
1867 | const register double C_1_0 = C_p[INDEX2(1,0,2)]; | const double C_1_0 = C_p[INDEX2(1,0,2)]; |
1868 | const register double C_0_1 = C_p[INDEX2(0,1,2)]; | const double C_0_1 = C_p[INDEX2(0,1,2)]; |
1869 | const register double C_1_1 = C_p[INDEX2(1,1,2)]; | const double C_1_1 = C_p[INDEX2(1,1,2)]; |
1870 | const register double C_0_2 = C_p[INDEX2(0,2,2)]; | const double C_0_2 = C_p[INDEX2(0,2,2)]; |
1871 | const register double C_1_2 = C_p[INDEX2(1,2,2)]; | const double C_1_2 = C_p[INDEX2(1,2,2)]; |
1872 | const register double C_0_3 = C_p[INDEX2(0,3,2)]; | const double C_0_3 = C_p[INDEX2(0,3,2)]; |
1873 | const register double C_1_3 = C_p[INDEX2(1,3,2)]; | const double C_1_3 = C_p[INDEX2(1,3,2)]; |
1874 | const register double tmp2_0 = C_0_1 + C_0_3; | const double tmp0_0 = C_1_0 + C_1_1; |
1875 | const register double tmp1_0 = C_1_2 + C_1_3; | const double tmp1_0 = C_1_2 + C_1_3; |
1876 | const register double tmp3_0 = C_0_0 + C_0_2; | const double tmp2_0 = C_0_1 + C_0_3; |
1877 | const register double tmp0_0 = C_1_0 + C_1_1; | const double tmp3_0 = C_0_0 + C_0_2; |
1878 | const register double tmp64_1 = C_0_2*w30; | const double tmp64_1 = C_0_2*w30; |
1879 | const register double tmp14_1 = C_0_2*w28; | const double tmp14_1 = C_0_2*w28; |
1880 | const register double tmp19_1 = C_0_3*w31; | const double tmp19_1 = C_0_3*w31; |
1881 | const register double tmp22_1 = C_1_0*w40; | const double tmp22_1 = C_1_0*w40; |
1882 | const register double tmp37_1 = tmp3_0*w35; | const double tmp37_1 = tmp3_0*w35; |
1883 | const register double tmp29_1 = C_0_1*w35; | const double tmp29_1 = C_0_1*w35; |
1884 | const register double tmp73_1 = C_0_2*w37; | const double tmp73_1 = C_0_2*w37; |
1885 | const register double tmp74_1 = C_1_2*w41; | const double tmp74_1 = C_1_2*w41; |
1886 | const register double tmp52_1 = C_1_3*w39; | const double tmp52_1 = C_1_3*w39; |
1887 | const register double tmp25_1 = C_1_1*w39; | const double tmp25_1 = C_1_1*w39; |
1888 | const register double tmp62_1 = C_0_1*w31; | const double tmp62_1 = C_0_1*w31; |
1889 | const register double tmp79_1 = C_1_1*w40; | const double tmp79_1 = C_1_1*w40; |
1890 | const register double tmp43_1 = C_1_1*w36; | const double tmp43_1 = C_1_1*w36; |
1891 | const register double tmp27_1 = C_0_3*w38; | const double tmp27_1 = C_0_3*w38; |
1892 | const register double tmp28_1 = C_1_0*w42; | const double tmp28_1 = C_1_0*w42; |
1893 | const register double tmp63_1 = C_1_1*w42; | const double tmp63_1 = C_1_1*w42; |
1894 | const register double tmp59_1 = C_0_3*w34; | const double tmp59_1 = C_0_3*w34; |
1895 | const register double tmp72_1 = C_1_3*w29; | const double tmp72_1 = C_1_3*w29; |
1896 | const register double tmp40_1 = tmp2_0*w35; | const double tmp40_1 = tmp2_0*w35; |
1897 | const register double tmp13_1 = C_0_3*w30; | const double tmp13_1 = C_0_3*w30; |
1898 | const register double tmp51_1 = C_1_2*w40; | const double tmp51_1 = C_1_2*w40; |
1899 | const register double tmp54_1 = C_1_2*w42; | const double tmp54_1 = C_1_2*w42; |
1900 | const register double tmp12_1 = C_0_0*w31; | const double tmp12_1 = C_0_0*w31; |
1901 | const register double tmp2_1 = tmp1_0*w32; | const double tmp2_1 = tmp1_0*w32; |
1902 | const register double tmp68_1 = C_0_2*w31; | const double tmp68_1 = C_0_2*w31; |
1903 | const register double tmp75_1 = C_1_0*w32; | const double tmp75_1 = C_1_0*w32; |
1904 | const register double tmp49_1 = C_1_1*w41; | const double tmp49_1 = C_1_1*w41; |
1905 | const register double tmp4_1 = C_0_2*w35; | const double tmp4_1 = C_0_2*w35; |
1906 | const register double tmp66_1 = C_0_3*w28; | const double tmp66_1 = C_0_3*w28; |
1907 | const register double tmp56_1 = C_0_1*w37; | const double tmp56_1 = C_0_1*w37; |
1908 | const register double tmp5_1 = C_0_1*w34; | const double tmp5_1 = C_0_1*w34; |
1909 | const register double tmp38_1 = tmp2_0*w34; | const double tmp38_1 = tmp2_0*w34; |
1910 | const register double tmp76_1 = C_0_1*w38; | const double tmp76_1 = C_0_1*w38; |
1911 | const register double tmp21_1 = C_0_1*w28; | const double tmp21_1 = C_0_1*w28; |
1912 | const register double tmp69_1 = C_0_1*w30; | const double tmp69_1 = C_0_1*w30; |
1913 | const register double tmp53_1 = C_1_0*w36; | const double tmp53_1 = C_1_0*w36; |
1914 | const register double tmp42_1 = C_1_2*w39; | const double tmp42_1 = C_1_2*w39; |
1915 | const register double tmp32_1 = tmp1_0*w29; | const double tmp32_1 = tmp1_0*w29; |
1916 | const register double tmp45_1 = C_1_0*w43; | const double tmp45_1 = C_1_0*w43; |
1917 | const register double tmp33_1 = tmp0_0*w32; | const double tmp33_1 = tmp0_0*w32; |
1918 | const register double tmp35_1 = C_1_0*w41; | const double tmp35_1 = C_1_0*w41; |
1919 | const register double tmp26_1 = C_1_2*w36; | const double tmp26_1 = C_1_2*w36; |
1920 | const register double tmp67_1 = C_0_0*w33; | const double tmp67_1 = C_0_0*w33; |
1921 | const register double tmp31_1 = C_0_2*w34; | const double tmp31_1 = C_0_2*w34; |
1922 | const register double tmp20_1 = C_0_0*w30; | const double tmp20_1 = C_0_0*w30; |
1923 | const register double tmp70_1 = C_0_0*w28; | const double tmp70_1 = C_0_0*w28; |
1924 | const register double tmp8_1 = tmp0_0*w39; | const double tmp8_1 = tmp0_0*w39; |
1925 | const register double tmp30_1 = C_1_3*w43; | const double tmp30_1 = C_1_3*w43; |
1926 | const register double tmp0_1 = tmp0_0*w29; | const double tmp0_1 = tmp0_0*w29; |
1927 | const register double tmp17_1 = C_1_3*w41; | const double tmp17_1 = C_1_3*w41; |
1928 | const register double tmp58_1 = C_0_0*w35; | const double tmp58_1 = C_0_0*w35; |
1929 | const register double tmp9_1 = tmp3_0*w33; | const double tmp9_1 = tmp3_0*w33; |
1930 | const register double tmp61_1 = C_1_3*w36; | const double tmp61_1 = C_1_3*w36; |
1931 | const register double tmp41_1 = tmp3_0*w34; | const double tmp41_1 = tmp3_0*w34; |
1932 | const register double tmp50_1 = C_1_3*w32; | const double tmp50_1 = C_1_3*w32; |
1933 | const register double tmp18_1 = C_1_1*w32; | const double tmp18_1 = C_1_1*w32; |
1934 | const register double tmp6_1 = tmp1_0*w36; | const double tmp6_1 = tmp1_0*w36; |
1935 | const register double tmp3_1 = C_0_0*w38; | const double tmp3_1 = C_0_0*w38; |
1936 | const register double tmp34_1 = C_1_1*w29; | const double tmp34_1 = C_1_1*w29; |
1937 | const register double tmp77_1 = C_0_3*w35; | const double tmp77_1 = C_0_3*w35; |
1938 | const register double tmp65_1 = C_1_2*w43; | const double tmp65_1 = C_1_2*w43; |
1939 | const register double tmp71_1 = C_0_3*w33; | const double tmp71_1 = C_0_3*w33; |
1940 | const register double tmp55_1 = C_1_1*w43; | const double tmp55_1 = C_1_1*w43; |
1941 | const register double tmp46_1 = tmp3_0*w28; | const double tmp46_1 = tmp3_0*w28; |
1942 | const register double tmp24_1 = C_0_0*w37; | const double tmp24_1 = C_0_0*w37; |
1943 | const register double tmp10_1 = tmp1_0*w39; | const double tmp10_1 = tmp1_0*w39; |
1944 | const register double tmp48_1 = C_1_0*w29; | const double tmp48_1 = C_1_0*w29; |
1945 | const register double tmp15_1 = C_0_1*w33; | const double tmp15_1 = C_0_1*w33; |
1946 | const register double tmp36_1 = C_1_2*w32; | const double tmp36_1 = C_1_2*w32; |
1947 | const register double tmp60_1 = C_1_0*w39; | const double tmp60_1 = C_1_0*w39; |
1948 | const register double tmp47_1 = tmp2_0*w33; | const double tmp47_1 = tmp2_0*w33; |
1949 | const register double tmp16_1 = C_1_2*w29; | const double tmp16_1 = C_1_2*w29; |
1950 | const register double tmp1_1 = C_0_3*w37; | const double tmp1_1 = C_0_3*w37; |
1951 | const register double tmp7_1 = tmp2_0*w28; | const double tmp7_1 = tmp2_0*w28; |
1952 | const register double tmp39_1 = C_1_3*w40; | const double tmp39_1 = C_1_3*w40; |
1953 | const register double tmp44_1 = C_1_3*w42; | const double tmp44_1 = C_1_3*w42; |
1954 | const register double tmp57_1 = C_0_2*w38; | const double tmp57_1 = C_0_2*w38; |
1955 | const register double tmp78_1 = C_0_0*w34; | const double tmp78_1 = C_0_0*w34; |
1956 | const register double tmp11_1 = tmp0_0*w36; | const double tmp11_1 = tmp0_0*w36; |
1957 | const register double tmp23_1 = C_0_2*w33; | const double tmp23_1 = C_0_2*w33; |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | ||
1958 | EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
1959 | EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
1960 | EM_S[INDEX2(2,0,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | |
1961 | EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; | EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; |
1962 | EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
1963 | EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | |
1964 | EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
1965 | EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | |
1966 | EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
1967 | EM_S[INDEX2(2,0,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | ||
1968 | EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
1969 | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
1970 | EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; | EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; |
1971 | EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
1972 | } else { /* constant data */ | EM_S[INDEX2(2,3,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
1973 | const register double C_0 = C_p[0]; | EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1974 | const register double C_1 = C_p[1]; | } else { // constant data |
1975 | const register double tmp1_1 = C_1*w45; | const double C_0 = C_p[0]; |
1976 | const register double tmp3_1 = C_0*w51; | const double C_1 = C_p[1]; |
1977 | const register double tmp4_1 = C_0*w44; | const double tmp0_1 = C_0*w47; |
1978 | const register double tmp7_1 = C_0*w46; | const double tmp1_1 = C_1*w45; |
1979 | const register double tmp5_1 = C_1*w49; | const double tmp2_1 = C_1*w48; |
1980 | const register double tmp2_1 = C_1*w48; | const double tmp3_1 = C_0*w51; |
1981 | const register double tmp0_1 = C_0*w47; | const double tmp4_1 = C_0*w44; |
1982 | const register double tmp6_1 = C_1*w50; | const double tmp5_1 = C_1*w49; |
1983 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | const double tmp6_1 = C_1*w50; |
1984 | EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; | const double tmp7_1 = C_0*w46; |
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp4_1; | ||
1985 | EM_S[INDEX2(0,0,4)]+=tmp4_1 + tmp5_1; | EM_S[INDEX2(0,0,4)]+=tmp4_1 + tmp5_1; |
1986 | EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp6_1; | EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp4_1; |
1987 | EM_S[INDEX2(2,0,4)]+=tmp3_1 + tmp5_1; | |
1988 | EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; | EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; |
1989 | EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp7_1; | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
1990 | EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp5_1; | |
1991 | EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp7_1; | EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp7_1; |
1992 | EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp7_1; | |
1993 | EM_S[INDEX2(0,2,4)]+=tmp3_1 + tmp6_1; | EM_S[INDEX2(0,2,4)]+=tmp3_1 + tmp6_1; |
1994 | EM_S[INDEX2(2,0,4)]+=tmp3_1 + tmp5_1; | EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; |
EM_S[INDEX2(1,3,4)]+=tmp6_1 + tmp7_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; | ||
1995 | EM_S[INDEX2(2,2,4)]+=tmp4_1 + tmp6_1; | EM_S[INDEX2(2,2,4)]+=tmp4_1 + tmp6_1; |
1996 | EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp4_1; | EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp4_1; |
1997 | EM_S[INDEX2(0,3,4)]+=tmp2_1 + tmp7_1; | EM_S[INDEX2(0,3,4)]+=tmp2_1 + tmp7_1; |
1998 | EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp5_1; | EM_S[INDEX2(1,3,4)]+=tmp6_1 + tmp7_1; |
1999 | EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; | |
2000 | EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp6_1; | |
2001 | } | } |
2002 | } | } |
2003 | /////////////// | /////////////// |
# | Line 1968 void Rectangle::assemblePDESingle(Paso_S | Line 2007 void Rectangle::assemblePDESingle(Paso_S |
2007 | add_EM_S=true; | add_EM_S=true; |
2008 | const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); | const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); |
2009 | if (D.actsExpanded()) { | if (D.actsExpanded()) { |
2010 | const register double D_0 = D_p[0]; | const double D_0 = D_p[0]; |
2011 | const register double D_1 = D_p[1]; | const double D_1 = D_p[1]; |
2012 | const register double D_2 = D_p[2]; | const double D_2 = D_p[2]; |
2013 | const register double D_3 = D_p[3]; | const double D_3 = D_p[3]; |
2014 | const register double tmp4_0 = D_1 + D_3; | const double tmp0_0 = D_0 + D_1; |
2015 | const register double tmp2_0 = D_0 + D_1 + D_2 + D_3; | const double tmp1_0 = D_2 + D_3; |
2016 | const register double tmp5_0 = D_0 + D_2; | const double tmp2_0 = D_0 + D_1 + D_2 + D_3; |
2017 | const register double tmp0_0 = D_0 + D_1; | const double tmp3_0 = D_1 + D_2; |
2018 | const register double tmp6_0 = D_0 + D_3; | const double tmp4_0 = D_1 + D_3; |
2019 | const register double tmp1_0 = D_2 + D_3; | const double tmp5_0 = D_0 + D_2; |
2020 | const register double tmp3_0 = D_1 + D_2; | const double tmp6_0 = D_0 + D_3; |
2021 | const register double tmp16_1 = D_1*w56; | const double tmp0_1 = tmp0_0*w52; |
2022 | const register double tmp14_1 = tmp6_0*w54; | const double tmp1_1 = tmp1_0*w53; |
2023 | const register double tmp8_1 = D_3*w55; | const double tmp2_1 = tmp2_0*w54; |
2024 | const register double tmp2_1 = tmp2_0*w54; | const double tmp3_1 = tmp1_0*w52; |
2025 | const register double tmp12_1 = tmp5_0*w52; | const double tmp4_1 = tmp0_0*w53; |
2026 | const register double tmp4_1 = tmp0_0*w53; | const double tmp5_1 = tmp3_0*w54; |
2027 | const register double tmp3_1 = tmp1_0*w52; | const double tmp6_1 = D_0*w55; |
2028 | const register double tmp13_1 = tmp4_0*w53; | const double tmp7_1 = D_3*w56; |
2029 | const register double tmp10_1 = tmp4_0*w52; | const double tmp8_1 = D_3*w55; |
2030 | const register double tmp1_1 = tmp1_0*w53; | const double tmp9_1 = D_0*w56; |
2031 | const register double tmp7_1 = D_3*w56; | const double tmp10_1 = tmp4_0*w52; |
2032 | const register double tmp0_1 = tmp0_0*w52; | const double tmp11_1 = tmp5_0*w53; |
2033 | const register double tmp11_1 = tmp5_0*w53; | const double tmp12_1 = tmp5_0*w52; |
2034 | const register double tmp9_1 = D_0*w56; | const double tmp13_1 = tmp4_0*w53; |
2035 | const register double tmp5_1 = tmp3_0*w54; | const double tmp14_1 = tmp6_0*w54; |
2036 | const register double tmp18_1 = D_2*w56; | const double tmp15_1 = D_2*w55; |
2037 | const register double tmp17_1 = D_1*w55; | const double tmp16_1 = D_1*w56; |
2038 | const register double tmp6_1 = D_0*w55; | const double tmp17_1 = D_1*w55; |
2039 | const register double tmp15_1 = D_2*w55; | const double tmp18_1 = D_2*w56; |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | ||
EM_S[INDEX2(1,2,4)]+=tmp2_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; | ||
2040 | EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp6_1 + tmp7_1; | EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp6_1 + tmp7_1; |
2041 | EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp8_1 + tmp9_1; | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1; |
2042 | EM_S[INDEX2(2,0,4)]+=tmp12_1 + tmp13_1; | |
2043 | EM_S[INDEX2(3,0,4)]+=tmp2_1; | EM_S[INDEX2(3,0,4)]+=tmp2_1; |
2044 | EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1; | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
2045 | EM_S[INDEX2(1,1,4)]+=tmp14_1 + tmp17_1 + tmp18_1; | |
2046 | EM_S[INDEX2(2,1,4)]+=tmp2_1; | EM_S[INDEX2(2,1,4)]+=tmp2_1; |
2047 | EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1; | |
2048 | EM_S[INDEX2(0,2,4)]+=tmp12_1 + tmp13_1; | EM_S[INDEX2(0,2,4)]+=tmp12_1 + tmp13_1; |
2049 | EM_S[INDEX2(2,0,4)]+=tmp12_1 + tmp13_1; | EM_S[INDEX2(1,2,4)]+=tmp2_1; |
EM_S[INDEX2(1,3,4)]+=tmp10_1 + tmp11_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp3_1 + tmp4_1; | ||
2050 | EM_S[INDEX2(2,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1; | EM_S[INDEX2(2,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1; |
2051 | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1; | EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; |
2052 | EM_S[INDEX2(0,3,4)]+=tmp2_1; | EM_S[INDEX2(0,3,4)]+=tmp2_1; |
2053 | EM_S[INDEX2(1,1,4)]+=tmp14_1 + tmp17_1 + tmp18_1; | EM_S[INDEX2(1,3,4)]+=tmp10_1 + tmp11_1; |
2054 | } else { /* constant data */ | EM_S[INDEX2(2,3,4)]+=tmp3_1 + tmp4_1; |
2055 | const register double D_0 = D_p[0]; | EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp8_1 + tmp9_1; |
2056 | const register double tmp2_1 = D_0*w59; | } else { // constant data |
2057 | const register double tmp1_1 = D_0*w58; | const double tmp0_1 = D_p[0]*w57; |
2058 | const register double tmp0_1 = D_0*w57; | const double tmp1_1 = D_p[0]*w58; |
2059 | EM_S[INDEX2(0,1,4)]+=tmp0_1; | const double tmp2_1 = D_p[0]*w59; |
EM_S[INDEX2(1,2,4)]+=tmp1_1; | ||
EM_S[INDEX2(3,2,4)]+=tmp0_1; | ||
2060 | EM_S[INDEX2(0,0,4)]+=tmp2_1; | EM_S[INDEX2(0,0,4)]+=tmp2_1; |
2061 | EM_S[INDEX2(3,3,4)]+=tmp2_1; | EM_S[INDEX2(1,0,4)]+=tmp0_1; |
2062 | EM_S[INDEX2(2,0,4)]+=tmp0_1; | |
2063 | EM_S[INDEX2(3,0,4)]+=tmp1_1; | EM_S[INDEX2(3,0,4)]+=tmp1_1; |
2064 | EM_S[INDEX2(3,1,4)]+=tmp0_1; | EM_S[INDEX2(0,1,4)]+=tmp0_1; |
2065 | EM_S[INDEX2(1,1,4)]+=tmp2_1; | |
2066 | EM_S[INDEX2(2,1,4)]+=tmp1_1; | EM_S[INDEX2(2,1,4)]+=tmp1_1; |
2067 | EM_S[INDEX2(3,1,4)]+=tmp0_1; | |
2068 | EM_S[INDEX2(0,2,4)]+=tmp0_1; | EM_S[INDEX2(0,2,4)]+=tmp0_1; |
2069 | EM_S[INDEX2(2,0,4)]+=tmp0_1; | EM_S[INDEX2(1,2,4)]+=tmp1_1; |
EM_S[INDEX2(1,3,4)]+=tmp0_1; | ||
EM_S[INDEX2(2,3,4)]+=tmp0_1; | ||
2070 | EM_S[INDEX2(2,2,4)]+=tmp2_1; | EM_S[INDEX2(2,2,4)]+=tmp2_1; |
2071 | EM_S[INDEX2(1,0,4)]+=tmp0_1; | EM_S[INDEX2(3,2,4)]+=tmp0_1; |
2072 | EM_S[INDEX2(0,3,4)]+=tmp1_1; | EM_S[INDEX2(0,3,4)]+=tmp1_1; |
2073 | EM_S[INDEX2(1,1,4)]+=tmp2_1; | EM_S[INDEX2(1,3,4)]+=tmp0_1; |
2074 | EM_S[INDEX2(2,3,4)]+=tmp0_1; | |
2075 | EM_S[INDEX2(3,3,4)]+=tmp2_1; | |
2076 | } | } |
2077 | } | } |
2078 | /////////////// | /////////////// |
# | Line 2044 void Rectangle::assemblePDESingle(Paso_S | Line 2082 void Rectangle::assemblePDESingle(Paso_S |
2082 | add_EM_F=true; | add_EM_F=true; |
2083 | const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); | const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); |
2084 | if (X.actsExpanded()) { | if (X.actsExpanded()) { |
2085 | const register double X_0_0 = X_p[INDEX2(0,0,2)]; | const double X_0_0 = X_p[INDEX2(0,0,2)]; |
2086 | const register double X_1_0 = X_p[INDEX2(1,0,2)]; | const double X_1_0 = X_p[INDEX2(1,0,2)]; |
2087 | const register double X_0_1 = X_p[INDEX2(0,1,2)]; | const double X_0_1 = X_p[INDEX2(0,1,2)]; |
2088 | const register double X_1_1 = X_p[INDEX2(1,1,2)]; | const double X_1_1 = X_p[INDEX2(1,1,2)]; |
2089 | const register double X_0_2 = X_p[INDEX2(0,2,2)]; | const double X_0_2 = X_p[INDEX2(0,2,2)]; |
2090 | const register double X_1_2 = X_p[INDEX2(1,2,2)]; | const double X_1_2 = X_p[INDEX2(1,2,2)]; |
2091 | const register double X_0_3 = X_p[INDEX2(0,3,2)]; | const double X_0_3 = X_p[INDEX2(0,3,2)]; |
2092 | const register double X_1_3 = X_p[INDEX2(1,3,2)]; | const double X_1_3 = X_p[INDEX2(1,3,2)]; |
2093 | const register double tmp1_0 = X_1_1 + X_1_3; | const double tmp0_0 = X_0_2 + X_0_3; |
2094 | const register double tmp3_0 = X_0_0 + X_0_1; | const double tmp1_0 = X_1_1 + X_1_3; |
2095 | const register double tmp2_0 = X_1_0 + X_1_2; | const double tmp2_0 = X_1_0 + X_1_2; |
2096 | const register double tmp0_0 = X_0_2 + X_0_3; | const double tmp3_0 = X_0_0 + X_0_1; |
2097 | const register double tmp8_1 = tmp2_0*w66; | const double tmp0_1 = tmp0_0*w63; |
2098 | const register double tmp5_1 = tmp3_0*w64; | const double tmp1_1 = tmp1_0*w62; |
2099 | const register double tmp14_1 = tmp0_0*w64; | const double tmp2_1 = tmp2_0*w61; |
2100 | const register double tmp3_1 = tmp3_0*w60; | const double tmp3_1 = tmp3_0*w60; |
2101 | const register double tmp9_1 = tmp3_0*w63; | const double tmp4_1 = tmp0_0*w65; |
2102 | const register double tmp13_1 = tmp3_0*w65; | const double tmp5_1 = tmp3_0*w64; |
2103 | const register double tmp12_1 = tmp1_0*w66; | const double tmp6_1 = tmp2_0*w62; |
2104 | const register double tmp10_1 = tmp0_0*w60; | const double tmp7_1 = tmp1_0*w61; |
2105 | const register double tmp2_1 = tmp2_0*w61; | const double tmp8_1 = tmp2_0*w66; |
2106 | const register double tmp6_1 = tmp2_0*w62; | const double tmp9_1 = tmp3_0*w63; |
2107 | const register double tmp4_1 = tmp0_0*w65; | const double tmp10_1 = tmp0_0*w60; |
2108 | const register double tmp11_1 = tmp1_0*w67; | const double tmp11_1 = tmp1_0*w67; |
2109 | const register double tmp1_1 = tmp1_0*w62; | const double tmp12_1 = tmp1_0*w66; |
2110 | const register double tmp7_1 = tmp1_0*w61; | const double tmp13_1 = tmp3_0*w65; |
2111 | const register double tmp0_1 = tmp0_0*w63; | const double tmp14_1 = tmp0_0*w64; |
2112 | const register double tmp15_1 = tmp2_0*w67; | const double tmp15_1 = tmp2_0*w67; |
2113 | EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
2114 | EM_F[1]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; | EM_F[1]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; |
2115 | EM_F[2]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; | EM_F[2]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; |
2116 | EM_F[3]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | EM_F[3]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
2117 | } else { /* constant data */ | } else { // constant data |
2118 | const register double X_0 = X_p[0]; | const double tmp0_1 = X_p[1]*w69; |
2119 | const register double X_1 = X_p[1]; | const double tmp1_1 = X_p[0]*w68; |
2120 | const register double tmp3_1 = X_1*w71; | const double tmp2_1 = X_p[0]*w70; |
2121 | const register double tmp2_1 = X_0*w70; | const double tmp3_1 = X_p[1]*w71; |
const register double tmp1_1 = X_0*w68; | ||
const register double tmp0_1 = X_1*w69; | ||
2122 | EM_F[0]+=tmp0_1 + tmp1_1; | EM_F[0]+=tmp0_1 + tmp1_1; |
2123 | EM_F[1]+=tmp0_1 + tmp2_1; | EM_F[1]+=tmp0_1 + tmp2_1; |
2124 | EM_F[2]+=tmp1_1 + tmp3_1; | EM_F[2]+=tmp1_1 + tmp3_1; |
# | Line 2096 void Rectangle::assemblePDESingle(Paso_S | Line 2132 void Rectangle::assemblePDESingle(Paso_S |
2132 | add_EM_F=true; | add_EM_F=true; |
2133 | const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); | const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); |
2134 | if (Y.actsExpanded()) { | if (Y.actsExpanded()) { |
2135 | const register double Y_0 = Y_p[0]; | const double Y_0 = Y_p[0]; |
2136 | const register double Y_1 = Y_p[1]; | const double Y_1 = Y_p[1]; |
2137 | const register double Y_2 = Y_p[2]; | const double Y_2 = Y_p[2]; |
2138 | const register double Y_3 = Y_p[3]; | const double Y_3 = Y_p[3]; |
2139 | const register double tmp0_0 = Y_1 + Y_2; | const double tmp0_0 = Y_1 + Y_2; |
2140 | const register double tmp1_0 = Y_0 + Y_3; | const double tmp1_0 = Y_0 + Y_3; |
2141 | const register double tmp9_1 = Y_0*w74; | const double tmp0_1 = Y_0*w72; |
2142 | const register double tmp4_1 = tmp1_0*w73; | const double tmp1_1 = tmp0_0*w73; |
2143 | const register double tmp5_1 = Y_2*w74; | const double tmp2_1 = Y_3*w74; |
2144 | const register double tmp7_1 = Y_1*w74; | const double tmp3_1 = Y_1*w72; |
2145 | const register double tmp6_1 = Y_2*w72; | const double tmp4_1 = tmp1_0*w73; |
2146 | const register double tmp2_1 = Y_3*w74; | const double tmp5_1 = Y_2*w74; |
2147 | const register double tmp8_1 = Y_3*w72; | const double tmp6_1 = Y_2*w72; |
2148 | const register double tmp3_1 = Y_1*w72; | const double tmp7_1 = Y_1*w74; |
2149 | const register double tmp0_1 = Y_0*w72; | const double tmp8_1 = Y_3*w72; |
2150 | const register double tmp1_1 = tmp0_0*w73; | const double tmp9_1 = Y_0*w74; |
2151 | EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1; | EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1; |
2152 | EM_F[1]+=tmp3_1 + tmp4_1 + tmp5_1; | EM_F[1]+=tmp3_1 + tmp4_1 + tmp5_1; |
2153 | EM_F[2]+=tmp4_1 + tmp6_1 + tmp7_1; | EM_F[2]+=tmp4_1 + tmp6_1 + tmp7_1; |
2154 | EM_F[3]+=tmp1_1 + tmp8_1 + tmp9_1; | EM_F[3]+=tmp1_1 + tmp8_1 + tmp9_1; |
2155 | } else { /* constant data */ | } else { // constant data |
2156 | const register double Y_0 = Y_p[0]; | const double tmp0_1 = Y_p[0]*w75; |
const register double tmp0_1 = Y_0*w75; | ||
2157 | EM_F[0]+=tmp0_1; | EM_F[0]+=tmp0_1; |
2158 | EM_F[1]+=tmp0_1; | EM_F[1]+=tmp0_1; |
2159 | EM_F[2]+=tmp0_1; | EM_F[2]+=tmp0_1; |
2160 | EM_F[3]+=tmp0_1; | EM_F[3]+=tmp0_1; |
2161 | } | } |
2162 | } | } |
/* GENERATOR SNIP_PDE_SINGLE BOTTOM */ | ||
2163 | ||
2164 | // add to matrix (if add_EM_S) and RHS (if add_EM_F) | // add to matrix (if add_EM_S) and RHS (if add_EM_F) |
2165 | const index_t firstNode=m_N0*k1+k0; | const index_t firstNode=m_N0*k1+k0; |
2166 | IndexVector rowIndex; | addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); |
2167 | rowIndex.push_back(m_dofMap[firstNode]); | } // end k0 loop |
2168 | rowIndex.push_back(m_dofMap[firstNode+1]); | } // end k1 loop |
2169 | rowIndex.push_back(m_dofMap[firstNode+m_N0]); | } // end of colouring |
2170 | rowIndex.push_back(m_dofMap[firstNode+m_N0+1]); | } // end of parallel region |
2171 | if (add_EM_F) { | } |
2172 | //cout << "-- AddtoRHS -- " << endl; | |
2173 | double *F_p=rhs.getSampleDataRW(0); | //protected |
2174 | for (index_t i=0; i<4; i++) { | void Rectangle::assemblePDESingleReduced(Paso_SystemMatrix* mat, |
2175 | if (rowIndex[i]<getNumDOF()) { | escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
2176 | F_p[rowIndex[i]]+=EM_F[i]; | const escript::Data& C, const escript::Data& D, |
2177 | //cout << "F[" << rowIndex[i] << "]=" << F_p[rowIndex[i]] << endl; | const escript::Data& X, const escript::Data& Y) const |
2178 | { | |
2179 | const double h0 = m_l0/m_gNE0; | |
2180 | const double h1 = m_l1/m_gNE1; | |
2181 | const double w0 = -.25*h1/h0; | |
2182 | const double w1 = .25; | |
2183 | const double w2 = -.25; | |
2184 | const double w3 = .25*h0/h1; | |
2185 | const double w4 = -.25*h0/h1; | |
2186 | const double w5 = .25*h1/h0; | |
2187 | const double w6 = -.125*h1; | |
2188 | const double w7 = -.125*h0; | |
2189 | const double w8 = .125*h1; | |
2190 | const double w9 = .125*h0; | |
2191 | const double w10 = .0625*h0*h1; | |
2192 | const double w11 = -.5*h1; | |
2193 | const double w12 = -.5*h0; | |
2194 | const double w13 = .5*h1; | |
2195 | const double w14 = .5*h0; | |
2196 | const double w15 = .25*h0*h1; | |
2197 | ||
2198 | rhs.requireWrite(); | |
2199 | #pragma omp parallel | |
2200 | { | |
2201 | for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | |
2202 | #pragma omp for | |
2203 | for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | |
2204 | for (index_t k0=0; k0<m_NE0; ++k0) { | |
2205 | bool add_EM_S=false; | |
2206 | bool add_EM_F=false; | |
2207 | vector<double> EM_S(4*4, 0); | |
2208 | vector<double> EM_F(4, 0); | |
2209 | const index_t e = k0 + m_NE0*k1; | |
2210 | /////////////// | |
2211 | // process A // | |
2212 | /////////////// | |
2213 | if (!A.isEmpty()) { | |
2214 | add_EM_S=true; | |
2215 | const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | |
2216 | const double A_00 = A_p[INDEX2(0,0,2)]; | |
2217 | const double A_10 = A_p[INDEX2(1,0,2)]; | |
2218 | const double A_01 = A_p[INDEX2(0,1,2)]; | |
2219 | const double A_11 = A_p[INDEX2(1,1,2)]; | |
2220 | const double tmp0_0 = A_01 + A_10; | |
2221 | const double tmp0_1 = A_11*w3; | |
2222 | const double tmp1_1 = A_00*w0; | |
2223 | const double tmp2_1 = A_01*w1; | |
2224 | const double tmp3_1 = A_10*w2; | |
2225 | const double tmp4_1 = tmp0_0*w1; | |
2226 | const double tmp5_1 = A_11*w4; | |
2227 | const double tmp6_1 = A_00*w5; | |
2228 | const double tmp7_1 = tmp0_0*w2; | |
2229 | const double tmp8_1 = A_10*w1; | |
2230 | const double tmp9_1 = A_01*w2; | |
2231 | EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp4_1 + tmp6_1; | |
2232 | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; | |
2233 | EM_S[INDEX2(2,0,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; | |
2234 | EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp5_1 + tmp7_1; | |
2235 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | |
2236 | EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp6_1 + tmp7_1; | |
2237 | EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp4_1 + tmp5_1; | |
2238 | EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; | |
2239 | EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp6_1 + tmp8_1 + tmp9_1; | |
2240 | EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp4_1 + tmp5_1; | |
2241 | EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1 + tmp7_1; | |
2242 | EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | |
2243 | EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp5_1 + tmp7_1; | |
2244 | EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp3_1 + tmp5_1 + tmp6_1; | |
2245 | EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp1_1 + tmp8_1 + tmp9_1; | |
2246 | EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp4_1 + tmp6_1; | |
2247 | } | |
2248 | /////////////// | |
2249 | // process B // | |
2250 | /////////////// | |
2251 | if (!B.isEmpty()) { | |
2252 | add_EM_S=true; | |
2253 | const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); | |
2254 | const double tmp2_1 = B_p[0]*w8; | |
2255 | const double tmp0_1 = B_p[0]*w6; | |
2256 | const double tmp3_1 = B_p[1]*w9; | |
2257 | const double tmp1_1 = B_p[1]*w7; | |
2258 | EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp1_1; | |
2259 | EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp2_1; | |
2260 | EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp3_1; | |
2261 | EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp3_1; | |
2262 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | |
2263 | EM_S[INDEX2(1,1,4)]+=tmp1_1 + tmp2_1; | |
2264 | EM_S[INDEX2(2,1,4)]+=tmp0_1 + tmp3_1; | |
2265 | EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp3_1; | |
2266 | EM_S[INDEX2(0,2,4)]+=tmp0_1 + tmp1_1; | |
2267 | EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; | |
2268 | EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp3_1; | |
2269 | EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp3_1; | |
2270 | EM_S[INDEX2(0,3,4)]+=tmp0_1 + tmp1_1; | |
2271 | EM_S[INDEX2(1,3,4)]+=tmp1_1 + tmp2_1; | |
2272 | EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp3_1; | |
2273 | EM_S[INDEX2(3,3,4)]+=tmp2_1 + tmp3_1; | |
2274 | } | |
2275 | /////////////// | |
2276 | // process C // | |
2277 | /////////////// | |
2278 | if (!C.isEmpty()) { | |
2279 | add_EM_S=true; | |
2280 | const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); | |
2281 | const double tmp1_1 = C_p[1]*w7; | |
2282 | const double tmp0_1 = C_p[0]*w8; | |
2283 | const double tmp3_1 = C_p[0]*w6; | |
2284 | const double tmp2_1 = C_p[1]*w9; | |
2285 | EM_S[INDEX2(0,0,4)]+=tmp1_1 + tmp3_1; | |
2286 | EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; | |
2287 | EM_S[INDEX2(2,0,4)]+=tmp1_1 + tmp3_1; | |
2288 | EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; | |
2289 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | |
2290 | EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp1_1; | |
2291 | EM_S[INDEX2(2,1,4)]+=tmp0_1 + tmp1_1; | |
2292 | EM_S[INDEX2(3,1,4)]+=tmp0_1 + tmp1_1; | |
2293 | EM_S[INDEX2(0,2,4)]+=tmp2_1 + tmp3_1; | |
2294 | EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; | |
2295 | EM_S[INDEX2(2,2,4)]+=tmp2_1 + tmp3_1; | |
2296 | EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp3_1; | |
2297 | EM_S[INDEX2(0,3,4)]+=tmp0_1 + tmp2_1; | |
2298 | EM_S[INDEX2(1,3,4)]+=tmp0_1 + tmp2_1; | |
2299 | EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; | |
2300 | EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp2_1; | |
2301 | } | |
2302 | /////////////// | |
2303 | // process D // | |
2304 | /////////////// | |
2305 | if (!D.isEmpty()) { | |
2306 | add_EM_S=true; | |
2307 | const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); | |
2308 | const double tmp0_1 = D_p[0]*w10; | |
2309 | EM_S[INDEX2(0,0,4)]+=tmp0_1; | |
2310 | EM_S[INDEX2(1,0,4)]+=tmp0_1; | |
2311 | EM_S[INDEX2(2,0,4)]+=tmp0_1; | |
2312 | EM_S[INDEX2(3,0,4)]+=tmp0_1; | |
2313 | EM_S[INDEX2(0,1,4)]+=tmp0_1; | |
2314 | EM_S[INDEX2(1,1,4)]+=tmp0_1; | |
2315 | EM_S[INDEX2(2,1,4)]+=tmp0_1; | |
2316 | EM_S[INDEX2(3,1,4)]+=tmp0_1; | |
2317 | EM_S[INDEX2(0,2,4)]+=tmp0_1; | |
2318 | EM_S[INDEX2(1,2,4)]+=tmp0_1; | |
2319 | EM_S[INDEX2(2,2,4)]+=tmp0_1; | |
2320 | EM_S[INDEX2(3,2,4)]+=tmp0_1; | |
2321 | EM_S[INDEX2(0,3,4)]+=tmp0_1; | |
2322 | EM_S[INDEX2(1,3,4)]+=tmp0_1; | |
2323 | EM_S[INDEX2(2,3,4)]+=tmp0_1; | |
2324 | EM_S[INDEX2(3,3,4)]+=tmp0_1; | |
2325 | } | |
2326 | /////////////// | |
2327 | // process X // | |
2328 | /////////////// | |
2329 | if (!X.isEmpty()) { | |
2330 | add_EM_F=true; | |
2331 | const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); | |
2332 | const double tmp0_1 = X_p[0]*w11; | |
2333 | const double tmp2_1 = X_p[0]*w13; | |
2334 | const double tmp1_1 = X_p[1]*w12; | |
2335 | const double tmp3_1 = X_p[1]*w14; | |
2336 | EM_F[0]+=tmp0_1 + tmp1_1; | |
2337 | EM_F[1]+=tmp1_1 + tmp2_1; | |
2338 | EM_F[2]+=tmp0_1 + tmp3_1; | |
2339 | EM_F[3]+=tmp2_1 + tmp3_1; | |
2340 | } | |
2341 | /////////////// | |
2342 | // process Y // | |
2343 | /////////////// | |
2344 | if (!Y.isEmpty()) { | |
2345 | add_EM_F=true; | |
2346 | const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); | |
2347 | const double tmp0_1 = Y_p[0]*w15; | |
2348 | EM_F[0]+=tmp0_1; | |
2349 | EM_F[1]+=tmp0_1; | |
2350 | EM_F[2]+=tmp0_1; | |
2351 | EM_F[3]+=tmp0_1; | |
2352 | } | |
2353 | ||
2354 | // add to matrix (if add_EM_S) and RHS (if add_EM_F) | |
2355 | const index_t firstNode=m_N0*k1+k0; | |
2356 | addToMatrixAndRHS(mat, rhs, EM_S, EM_F, add_EM_S, add_EM_F, firstNode); | |
2357 | } // end k0 loop | |
2358 | } // end k1 loop | |
2359 | } // end of colouring | |
2360 | } // end of parallel region | |
2361 | } | |
2362 | ||
2363 | //protected | |
2364 | void Rectangle::assemblePDESystem(Paso_SystemMatrix* mat, | |
2365 | escript::Data& rhs, const escript::Data& A, const escript::Data& B, | |
2366 | const escript::Data& C, const escript::Data& D, | |
2367 | const escript::Data& X, const escript::Data& Y) const | |
2368 | { | |
2369 | const double h0 = m_l0/m_gNE0; | |
2370 | const double h1 = m_l1/m_gNE1; | |
2371 | dim_t numEq, numComp; | |
2372 | if (!mat) | |
2373 | numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize()); | |
2374 | else { | |
2375 | numEq=mat->logical_row_block_size; | |
2376 | numComp=mat->logical_col_block_size; | |
2377 | } | |
2378 | ||
2379 | const double w0 = -0.1555021169820365539*h1/h0; | |
2380 | const double w1 = 0.041666666666666666667; | |
2381 | const double w2 = -0.15550211698203655390; | |
2382 | const double w3 = 0.041666666666666666667*h0/h1; | |
2383 | const double w4 = 0.15550211698203655390; | |
2384 | const double w5 = -0.041666666666666666667; | |
2385 | const double w6 = -0.01116454968463011277*h1/h0; | |
2386 | const double w7 = 0.011164549684630112770; | |
2387 | const double w8 = -0.011164549684630112770; | |
2388 | const double w9 = -0.041666666666666666667*h1/h0; | |
2389 | const double w10 = -0.041666666666666666667*h0/h1; | |
2390 | const double w11 = 0.1555021169820365539*h1/h0; | |
2391 | const double w12 = 0.1555021169820365539*h0/h1; | |
2392 | const double w13 = 0.01116454968463011277*h0/h1; | |
2393 | const double w14 = 0.01116454968463011277*h1/h0; | |
2394 | const double w15 = 0.041666666666666666667*h1/h0; | |
2395 | const double w16 = -0.01116454968463011277*h0/h1; | |
2396 | const double w17 = -0.1555021169820365539*h0/h1; | |
2397 | const double w18 = -0.33333333333333333333*h1/h0; | |
2398 | const double w19 = 0.25000000000000000000; | |
2399 | const double w20 = -0.25000000000000000000; | |
2400 | const double w21 = 0.16666666666666666667*h0/h1; | |
2401 | const double w22 = -0.16666666666666666667*h1/h0; | |
2402 | const double w23 = -0.16666666666666666667*h0/h1; | |
2403 | const double w24 = 0.33333333333333333333*h1/h0; | |
2404 | const double w25 = 0.33333333333333333333*h0/h1; | |
2405 | const double w26 = 0.16666666666666666667*h1/h0; | |
2406 | const double w27 = -0.33333333333333333333*h0/h1; | |
2407 | const double w28 = -0.032861463941450536761*h1; | |
2408 | const double w29 = -0.032861463941450536761*h0; | |
2409 | const double w30 = -0.12264065304058601714*h1; | |
2410 | const double w31 = -0.0023593469594139828636*h1; | |
2411 | const double w32 = -0.008805202725216129906*h0; | |
2412 | const double w33 = -0.008805202725216129906*h1; | |
2413 | const double w34 = 0.032861463941450536761*h1; | |
2414 | const double w35 = 0.008805202725216129906*h1; | |
2415 | const double w36 = 0.008805202725216129906*h0; | |
2416 | const double w37 = 0.0023593469594139828636*h1; | |
2417 | const double w38 = 0.12264065304058601714*h1; | |
2418 | const double w39 = 0.032861463941450536761*h0; | |
2419 | const double w40 = -0.12264065304058601714*h0; | |
2420 | const double w41 = -0.0023593469594139828636*h0; | |
2421 | const double w42 = 0.0023593469594139828636*h0; | |
2422 | const double w43 = 0.12264065304058601714*h0; | |
2423 | const double w44 = -0.16666666666666666667*h1; | |
2424 | const double w45 = -0.083333333333333333333*h0; | |
2425 | const double w46 = 0.083333333333333333333*h1; | |
2426 | const double w47 = 0.16666666666666666667*h1; | |
2427 | const double w48 = 0.083333333333333333333*h0; | |
2428 | const double w49 = -0.16666666666666666667*h0; | |
2429 | const double w50 = 0.16666666666666666667*h0; | |
2430 | const double w51 = -0.083333333333333333333*h1; | |
2431 | const double w52 = 0.025917019497006092316*h0*h1; | |
2432 | const double w53 = 0.0018607582807716854616*h0*h1; | |
2433 | const double w54 = 0.0069444444444444444444*h0*h1; | |
2434 | const double w55 = 0.09672363354357992482*h0*h1; | |
2435 | const double w56 = 0.00049858867864229740201*h0*h1; | |
2436 | const double w57 = 0.055555555555555555556*h0*h1; | |
2437 | const double w58 = 0.027777777777777777778*h0*h1; | |
2438 | const double w59 = 0.11111111111111111111*h0*h1; | |
2439 | const double w60 = -0.19716878364870322056*h1; | |
2440 | const double w61 = -0.19716878364870322056*h0; | |
2441 | const double w62 = -0.052831216351296779436*h0; | |
2442 | const double w63 = -0.052831216351296779436*h1; | |
2443 | const double w64 = 0.19716878364870322056*h1; | |
2444 | const double w65 = 0.052831216351296779436*h1; | |
2445 | const double w66 = 0.19716878364870322056*h0; | |
2446 | const double w67 = 0.052831216351296779436*h0; | |
2447 | const double w68 = -0.5*h1; | |
2448 | const double w69 = -0.5*h0; | |
2449 | const double w70 = 0.5*h1; | |
2450 | const double w71 = 0.5*h0; | |
2451 | const double w72 = 0.1555021169820365539*h0*h1; | |
2452 | const double w73 = 0.041666666666666666667*h0*h1; | |
2453 | const double w74 = 0.01116454968463011277*h0*h1; | |
2454 | const double w75 = 0.25*h0*h1; | |
2455 | ||
2456 | rhs.requireWrite(); | |
2457 | #pragma omp parallel | |
2458 | { | |
2459 | for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | |
2460 | #pragma omp for | |
2461 | for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | |
2462 | for (index_t k0=0; k0<m_NE0; ++k0) { | |
2463 | bool add_EM_S=false; | |
2464 | bool add_EM_F=false; | |
2465 | vector<double> EM_S(4*4*numEq*numComp, 0); | |
2466 | vector<double> EM_F(4*numEq, 0); | |
2467 | const index_t e = k0 + m_NE0*k1; | |
2468 | /////////////// | |
2469 | // process A // | |
2470 | /////////////// | |
2471 | if (!A.isEmpty()) { | |
2472 | add_EM_S=true; | |
2473 | const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | |
2474 | if (A.actsExpanded()) { | |
2475 | for (index_t k=0; k<numEq; k++) { | |
2476 | for (index_t m=0; m<numComp; m++) { | |
2477 | const double A_00_0 = A_p[INDEX5(k,0,m,0,0, numEq,2,numComp,2)]; | |
2478 | const double A_01_0 = A_p[INDEX5(k,0,m,1,0, numEq,2,numComp,2)]; | |
2479 | const double A_10_0 = A_p[INDEX5(k,1,m,0,0, numEq,2,numComp,2)]; | |
2480 | const double A_11_0 = A_p[INDEX5(k,1,m,1,0, numEq,2,numComp,2)]; | |
2481 | const double A_00_1 = A_p[INDEX5(k,0,m,0,1, numEq,2,numComp,2)]; | |
2482 | const double A_01_1 = A_p[INDEX5(k,0,m,1,1, numEq,2,numComp,2)]; | |
2483 | const double A_10_1 = A_p[INDEX5(k,1,m,0,1, numEq,2,numComp,2)]; | |
2484 | const double A_11_1 = A_p[INDEX5(k,1,m,1,1, numEq,2,numComp,2)]; | |
2485 | const double A_00_2 = A_p[INDEX5(k,0,m,0,2, numEq,2,numComp,2)]; | |
2486 | const double A_01_2 = A_p[INDEX5(k,0,m,1,2, numEq,2,numComp,2)]; | |
2487 | const double A_10_2 = A_p[INDEX5(k,1,m,0,2, numEq,2,numComp,2)]; | |
2488 | const double A_11_2 = A_p[INDEX5(k,1,m,1,2, numEq,2,numComp,2)]; | |
2489 | const double A_00_3 = A_p[INDEX5(k,0,m,0,3, numEq,2,numComp,2)]; | |
2490 |