13 |
|
|
14 |
#include <ripley/Rectangle.h> |
#include <ripley/Rectangle.h> |
15 |
extern "C" { |
extern "C" { |
16 |
#include "paso/SystemMatrixPattern.h" |
#include "paso/SystemMatrix.h" |
17 |
} |
} |
18 |
|
|
19 |
#if USE_SILO |
#if USE_SILO |
43 |
if (m_NX*m_NY != m_mpiInfo->size) |
if (m_NX*m_NY != m_mpiInfo->size) |
44 |
throw RipleyException("Invalid number of spatial subdivisions"); |
throw RipleyException("Invalid number of spatial subdivisions"); |
45 |
|
|
46 |
if (n0%m_NX > 0 || n1%m_NY > 0) |
if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0) |
47 |
throw RipleyException("Number of elements must be separable into number of ranks in each dimension"); |
throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension"); |
48 |
|
|
49 |
// local number of elements |
if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2)) |
50 |
m_NE0 = n0/m_NX; |
throw RipleyException("Too few elements for the number of ranks"); |
51 |
m_NE1 = n1/m_NY; |
|
52 |
// local number of nodes (not necessarily owned) |
// local number of elements (including overlap) |
53 |
|
m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0); |
54 |
|
if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1) |
55 |
|
m_NE0++; |
56 |
|
m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1); |
57 |
|
if (m_mpiInfo->rank/m_NX>0 && m_mpiInfo->rank/m_NX<m_NY-1) |
58 |
|
m_NE1++; |
59 |
|
|
60 |
|
// local number of nodes |
61 |
m_N0 = m_NE0+1; |
m_N0 = m_NE0+1; |
62 |
m_N1 = m_NE1+1; |
m_N1 = m_NE1+1; |
63 |
|
|
64 |
// bottom-left node is at (offset0,offset1) in global mesh |
// bottom-left node is at (offset0,offset1) in global mesh |
65 |
m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX); |
m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX); |
66 |
m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX); |
if (m_offset0 > 0) |
67 |
|
m_offset0--; |
68 |
|
m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank/m_NX); |
69 |
|
if (m_offset1 > 0) |
70 |
|
m_offset1--; |
71 |
|
|
72 |
populateSampleIds(); |
populateSampleIds(); |
73 |
|
createPattern(); |
74 |
} |
} |
75 |
|
|
76 |
Rectangle::~Rectangle() |
Rectangle::~Rectangle() |
84 |
|
|
85 |
bool Rectangle::operator==(const AbstractDomain& other) const |
bool Rectangle::operator==(const AbstractDomain& other) const |
86 |
{ |
{ |
87 |
if (dynamic_cast<const Rectangle*>(&other)) |
const Rectangle* o=dynamic_cast<const Rectangle*>(&other); |
88 |
return this==&other; |
if (o) { |
89 |
|
return (RipleyDomain::operator==(other) && |
90 |
|
m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 |
91 |
|
&& m_l0==o->m_l0 && m_l1==o->m_l1 |
92 |
|
&& m_NX==o->m_NX && m_NY==o->m_NY); |
93 |
|
} |
94 |
|
|
95 |
return false; |
return false; |
96 |
} |
} |
241 |
{ |
{ |
242 |
switch (fsType) { |
switch (fsType) { |
243 |
case Nodes: |
case Nodes: |
244 |
|
case ReducedNodes: //FIXME: reduced |
245 |
return &m_nodeId[0]; |
return &m_nodeId[0]; |
246 |
|
case DegreesOfFreedom: |
247 |
|
case ReducedDegreesOfFreedom: //FIXME: reduced |
248 |
|
return &m_dofId[0]; |
249 |
case Elements: |
case Elements: |
250 |
|
case ReducedElements: |
251 |
return &m_elementId[0]; |
return &m_elementId[0]; |
252 |
case FaceElements: |
case FaceElements: |
253 |
|
case ReducedFaceElements: |
254 |
return &m_faceId[0]; |
return &m_faceId[0]; |
255 |
default: |
default: |
256 |
break; |
break; |
262 |
throw RipleyException(msg.str()); |
throw RipleyException(msg.str()); |
263 |
} |
} |
264 |
|
|
265 |
bool Rectangle::ownSample(int fsCode, index_t id) const |
bool Rectangle::ownSample(int fsType, index_t id) const |
266 |
{ |
{ |
267 |
#ifdef ESYS_MPI |
if (getMPISize()==1) |
268 |
if (fsCode != ReducedNodes) { |
return true; |
269 |
const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank]; |
|
270 |
const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1; |
switch (fsType) { |
271 |
return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast); |
case Nodes: |
272 |
} else { |
case ReducedNodes: //FIXME: reduced |
273 |
stringstream msg; |
return (m_dofMap[id] < getNumDOF()); |
274 |
msg << "ownSample() not implemented for " |
case DegreesOfFreedom: |
275 |
<< functionSpaceTypeAsString(fsCode); |
case ReducedDegreesOfFreedom: |
276 |
throw RipleyException(msg.str()); |
return true; |
277 |
|
case Elements: |
278 |
|
case ReducedElements: |
279 |
|
// check ownership of element's bottom left node |
280 |
|
return (m_dofMap[id%m_NE0+m_N0*(id/m_NE0)] < getNumDOF()); |
281 |
|
case FaceElements: |
282 |
|
case ReducedFaceElements: |
283 |
|
{ |
284 |
|
// check ownership of face element's first node |
285 |
|
const IndexVector faces = getNumFacesPerBoundary(); |
286 |
|
dim_t n=0; |
287 |
|
for (size_t i=0; i<faces.size(); i++) { |
288 |
|
n+=faces[i]; |
289 |
|
if (id<n) { |
290 |
|
index_t k; |
291 |
|
if (i==1) |
292 |
|
k=m_N0-1; |
293 |
|
else if (i==3) |
294 |
|
k=m_N0*(m_N1-1); |
295 |
|
else |
296 |
|
k=0; |
297 |
|
// determine whether to move right or up |
298 |
|
const index_t delta=(i/2==0 ? m_N0 : 1); |
299 |
|
return (m_dofMap[k+(id-n+faces[i])*delta] < getNumDOF()); |
300 |
|
} |
301 |
|
} |
302 |
|
return false; |
303 |
|
} |
304 |
|
default: |
305 |
|
break; |
306 |
} |
} |
307 |
#else |
|
308 |
return true; |
stringstream msg; |
309 |
#endif |
msg << "ownSample() not implemented for " |
310 |
|
<< functionSpaceTypeAsString(fsType); |
311 |
|
throw RipleyException(msg.str()); |
312 |
} |
} |
313 |
|
|
314 |
void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const |
void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const |
335 |
const double cy7 = 1./h1; |
const double cy7 = 1./h1; |
336 |
|
|
337 |
if (out.getFunctionSpace().getTypeCode() == Elements) { |
if (out.getFunctionSpace().getTypeCode() == Elements) { |
338 |
|
out.requireWrite(); |
339 |
/*** GENERATOR SNIP_GRAD_ELEMENTS TOP */ |
/*** GENERATOR SNIP_GRAD_ELEMENTS TOP */ |
340 |
#pragma omp parallel for |
#pragma omp parallel for |
341 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
359 |
} /* end of k1 loop */ |
} /* end of k1 loop */ |
360 |
/* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */ |
/* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */ |
361 |
} else if (out.getFunctionSpace().getTypeCode() == ReducedElements) { |
} else if (out.getFunctionSpace().getTypeCode() == ReducedElements) { |
362 |
|
out.requireWrite(); |
363 |
/*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */ |
/*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */ |
364 |
#pragma omp parallel for |
#pragma omp parallel for |
365 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
377 |
} /* end of k1 loop */ |
} /* end of k1 loop */ |
378 |
/* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */ |
/* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */ |
379 |
} else if (out.getFunctionSpace().getTypeCode() == FaceElements) { |
} else if (out.getFunctionSpace().getTypeCode() == FaceElements) { |
380 |
|
out.requireWrite(); |
381 |
#pragma omp parallel |
#pragma omp parallel |
382 |
{ |
{ |
383 |
/*** GENERATOR SNIP_GRAD_FACES TOP */ |
/*** GENERATOR SNIP_GRAD_FACES TOP */ |
448 |
/* GENERATOR SNIP_GRAD_FACES BOTTOM */ |
/* GENERATOR SNIP_GRAD_FACES BOTTOM */ |
449 |
} // end of parallel section |
} // end of parallel section |
450 |
} else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
} else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
451 |
|
out.requireWrite(); |
452 |
#pragma omp parallel |
#pragma omp parallel |
453 |
{ |
{ |
454 |
/*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */ |
/*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */ |
683 |
void Rectangle::setToNormal(escript::Data& out) const |
void Rectangle::setToNormal(escript::Data& out) const |
684 |
{ |
{ |
685 |
if (out.getFunctionSpace().getTypeCode() == FaceElements) { |
if (out.getFunctionSpace().getTypeCode() == FaceElements) { |
686 |
|
out.requireWrite(); |
687 |
#pragma omp parallel |
#pragma omp parallel |
688 |
{ |
{ |
689 |
if (m_faceOffset[0] > -1) { |
if (m_faceOffset[0] > -1) { |
735 |
} |
} |
736 |
} // end of parallel section |
} // end of parallel section |
737 |
} else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
} else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
738 |
|
out.requireWrite(); |
739 |
#pragma omp parallel |
#pragma omp parallel |
740 |
{ |
{ |
741 |
if (m_faceOffset[0] > -1) { |
if (m_faceOffset[0] > -1) { |
783 |
} |
} |
784 |
} |
} |
785 |
|
|
786 |
void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat, |
void Rectangle::setToSize(escript::Data& out) const |
|
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
|
|
const escript::Data& C, const escript::Data& D, |
|
|
const escript::Data& X, const escript::Data& Y, |
|
|
const escript::Data& d, const escript::Data& y, |
|
|
const escript::Data& d_contact, const escript::Data& y_contact, |
|
|
const escript::Data& d_dirac, const escript::Data& y_dirac) const |
|
787 |
{ |
{ |
788 |
throw RipleyException("addPDEToSystem() not implemented"); |
if (out.getFunctionSpace().getTypeCode() == Elements |
789 |
|
|| out.getFunctionSpace().getTypeCode() == ReducedElements) { |
790 |
|
out.requireWrite(); |
791 |
|
const dim_t numQuad=out.getNumDataPointsPerSample(); |
792 |
|
const double hSize=getFirstCoordAndSpacing(0).second; |
793 |
|
const double vSize=getFirstCoordAndSpacing(1).second; |
794 |
|
const double size=min(hSize,vSize); |
795 |
|
#pragma omp parallel for |
796 |
|
for (index_t k = 0; k < getNumElements(); ++k) { |
797 |
|
double* o = out.getSampleDataRW(k); |
798 |
|
fill(o, o+numQuad, size); |
799 |
|
} |
800 |
|
} else if (out.getFunctionSpace().getTypeCode() == FaceElements |
801 |
|
|| out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
802 |
|
out.requireWrite(); |
803 |
|
const dim_t numQuad=out.getNumDataPointsPerSample(); |
804 |
|
const double hSize=getFirstCoordAndSpacing(0).second; |
805 |
|
const double vSize=getFirstCoordAndSpacing(1).second; |
806 |
|
#pragma omp parallel |
807 |
|
{ |
808 |
|
if (m_faceOffset[0] > -1) { |
809 |
|
#pragma omp for nowait |
810 |
|
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
811 |
|
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
812 |
|
fill(o, o+numQuad, vSize); |
813 |
|
} |
814 |
|
} |
815 |
|
|
816 |
|
if (m_faceOffset[1] > -1) { |
817 |
|
#pragma omp for nowait |
818 |
|
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
819 |
|
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
820 |
|
fill(o, o+numQuad, vSize); |
821 |
|
} |
822 |
|
} |
823 |
|
|
824 |
|
if (m_faceOffset[2] > -1) { |
825 |
|
#pragma omp for nowait |
826 |
|
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
827 |
|
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
828 |
|
fill(o, o+numQuad, hSize); |
829 |
|
} |
830 |
|
} |
831 |
|
|
832 |
|
if (m_faceOffset[3] > -1) { |
833 |
|
#pragma omp for nowait |
834 |
|
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
835 |
|
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
836 |
|
fill(o, o+numQuad, hSize); |
837 |
|
} |
838 |
|
} |
839 |
|
} // end of parallel section |
840 |
|
|
841 |
|
} else { |
842 |
|
stringstream msg; |
843 |
|
msg << "setToSize() not implemented for " |
844 |
|
<< functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode()); |
845 |
|
throw RipleyException(msg.str()); |
846 |
|
} |
847 |
} |
} |
848 |
|
|
849 |
Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, |
Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, |
852 |
if (reducedRowOrder || reducedColOrder) |
if (reducedRowOrder || reducedColOrder) |
853 |
throw RipleyException("getPattern() not implemented for reduced order"); |
throw RipleyException("getPattern() not implemented for reduced order"); |
854 |
|
|
855 |
// connector |
return m_pattern; |
|
RankVector neighbour; |
|
|
IndexVector offsetInShared(1,0); |
|
|
IndexVector sendShared, recvShared; |
|
|
const IndexVector faces=getNumFacesPerBoundary(); |
|
|
const index_t left = (m_offset0==0 ? 0 : 1); |
|
|
const index_t bottom = (m_offset1==0 ? 0 : 1); |
|
|
// corner node from bottom-left |
|
|
if (faces[0] == 0 && faces[2] == 0) { |
|
|
neighbour.push_back(m_mpiInfo->rank-m_NX-1); |
|
|
offsetInShared.push_back(offsetInShared.back()+1); |
|
|
sendShared.push_back(m_nodeId[m_N0+left]); |
|
|
recvShared.push_back(m_nodeId[0]); |
|
|
} |
|
|
// bottom edge |
|
|
if (faces[2] == 0) { |
|
|
neighbour.push_back(m_mpiInfo->rank-m_NX); |
|
|
offsetInShared.push_back(offsetInShared.back()+m_N0-left); |
|
|
for (dim_t i=left; i<m_N0; i++) { |
|
|
// easy case, we know the neighbour id's |
|
|
sendShared.push_back(m_nodeId[m_N0+i]); |
|
|
recvShared.push_back(m_nodeId[i]); |
|
|
} |
|
|
} |
|
|
// corner node from bottom-right |
|
|
if (faces[1] == 0 && faces[2] == 0) { |
|
|
neighbour.push_back(m_mpiInfo->rank-m_NX+1); |
|
|
const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1); |
|
|
const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1); |
|
|
const index_t first=m_nodeDistribution[neighbour.back()]; |
|
|
offsetInShared.push_back(offsetInShared.back()+1); |
|
|
sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]); |
|
|
recvShared.push_back(first+N0*(N1-1)); |
|
|
} |
|
|
// left edge |
|
|
if (faces[0] == 0) { |
|
|
neighbour.push_back(m_mpiInfo->rank-1); |
|
|
offsetInShared.push_back(offsetInShared.back()+m_N1-bottom); |
|
|
for (dim_t i=bottom; i<m_N1; i++) { |
|
|
// easy case, we know the neighbour id's |
|
|
sendShared.push_back(m_nodeId[i*m_N0+1]); |
|
|
recvShared.push_back(m_nodeId[i*m_N0]); |
|
|
} |
|
|
} |
|
|
// right edge |
|
|
if (faces[1] == 0) { |
|
|
neighbour.push_back(m_mpiInfo->rank+1); |
|
|
const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1); |
|
|
const index_t first=m_nodeDistribution[neighbour.back()]; |
|
|
offsetInShared.push_back(offsetInShared.back()+m_N1-bottom); |
|
|
for (dim_t i=bottom; i<m_N1; i++) { |
|
|
sendShared.push_back(m_nodeId[(i+1)*m_N0-1]); |
|
|
recvShared.push_back(first+rightN0*(i-bottom)); |
|
|
} |
|
|
} |
|
|
// corner node from top-left |
|
|
if (faces[0] == 0 && faces[3] == 0) { |
|
|
neighbour.push_back(m_mpiInfo->rank+m_NX-1); |
|
|
const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1); |
|
|
const index_t first=m_nodeDistribution[neighbour.back()]; |
|
|
offsetInShared.push_back(offsetInShared.back()+1); |
|
|
sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]); |
|
|
recvShared.push_back(first+N0-1); |
|
|
} |
|
|
// top edge |
|
|
if (faces[3] == 0) { |
|
|
neighbour.push_back(m_mpiInfo->rank+m_NX); |
|
|
const index_t first=m_nodeDistribution[neighbour.back()]; |
|
|
offsetInShared.push_back(offsetInShared.back()+m_N0-left); |
|
|
for (dim_t i=left; i<m_N0; i++) { |
|
|
sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]); |
|
|
recvShared.push_back(first+i-left); |
|
|
} |
|
|
} |
|
|
// corner node from top-right |
|
|
if (faces[1] == 0 && faces[3] == 0) { |
|
|
neighbour.push_back(m_mpiInfo->rank+m_NX+1); |
|
|
const index_t first=m_nodeDistribution[neighbour.back()]; |
|
|
offsetInShared.push_back(offsetInShared.back()+1); |
|
|
sendShared.push_back(m_nodeId[m_N0*m_N1-1]); |
|
|
recvShared.push_back(first); |
|
|
} |
|
|
const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank]; |
|
|
/* |
|
|
cout << "--- rcv_shcomp ---" << endl; |
|
|
cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl; |
|
|
for (size_t i=0; i<neighbour.size(); i++) { |
|
|
cout << "neighbor[" << i << "]=" << neighbour[i] |
|
|
<< " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl; |
|
|
} |
|
|
for (size_t i=0; i<recvShared.size(); i++) { |
|
|
cout << "shared[" << i << "]=" << recvShared[i] << endl; |
|
|
} |
|
|
cout << "--- snd_shcomp ---" << endl; |
|
|
for (size_t i=0; i<sendShared.size(); i++) { |
|
|
cout << "shared[" << i << "]=" << sendShared[i] << endl; |
|
|
} |
|
|
*/ |
|
|
|
|
|
Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( |
|
|
numDOF, neighbour.size(), &neighbour[0], &sendShared[0], |
|
|
&offsetInShared[0], 1, 0, m_mpiInfo); |
|
|
Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc( |
|
|
numDOF, neighbour.size(), &neighbour[0], &recvShared[0], |
|
|
&offsetInShared[0], 1, 0, m_mpiInfo); |
|
|
Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp); |
|
|
Paso_SharedComponents_free(snd_shcomp); |
|
|
Paso_SharedComponents_free(rcv_shcomp); |
|
|
|
|
|
// create patterns |
|
|
dim_t M, N; |
|
|
IndexVector ptr(1,0); |
|
|
IndexVector index; |
|
|
|
|
|
// main pattern |
|
|
for (index_t i=0; i<numDOF; i++) { |
|
|
// always add the node itself |
|
|
index.push_back(i); |
|
|
int num=insertNeighbours(index, i); |
|
|
ptr.push_back(ptr.back()+num+1); |
|
|
} |
|
|
M=N=ptr.size()-1; |
|
|
// paso will manage the memory |
|
|
index_t* indexC = MEMALLOC(index.size(),index_t); |
|
|
index_t* ptrC = MEMALLOC(ptr.size(), index_t); |
|
|
copy(index.begin(), index.end(), indexC); |
|
|
copy(ptr.begin(), ptr.end(), ptrC); |
|
|
Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, |
|
|
M, N, ptrC, indexC); |
|
|
|
|
|
/* |
|
|
cout << "--- main_pattern ---" << endl; |
|
|
cout << "M=" << M << ", N=" << N << endl; |
|
|
for (size_t i=0; i<ptr.size(); i++) { |
|
|
cout << "ptr[" << i << "]=" << ptr[i] << endl; |
|
|
} |
|
|
for (size_t i=0; i<index.size(); i++) { |
|
|
cout << "index[" << i << "]=" << index[i] << endl; |
|
|
} |
|
|
*/ |
|
|
|
|
|
ptr.clear(); |
|
|
index.clear(); |
|
|
|
|
|
// column & row couple patterns |
|
|
Paso_Pattern *colCouplePattern, *rowCouplePattern; |
|
|
generateCouplePatterns(&colCouplePattern, &rowCouplePattern); |
|
|
|
|
|
// allocate paso distribution |
|
|
Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, |
|
|
const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0); |
|
|
|
|
|
Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc( |
|
|
MATRIX_FORMAT_DEFAULT, distribution, distribution, |
|
|
mainPattern, colCouplePattern, rowCouplePattern, |
|
|
connector, connector); |
|
|
Paso_Pattern_free(mainPattern); |
|
|
Paso_Pattern_free(colCouplePattern); |
|
|
Paso_Pattern_free(rowCouplePattern); |
|
|
Paso_Distribution_free(distribution); |
|
|
return pattern; |
|
856 |
} |
} |
857 |
|
|
858 |
void Rectangle::Print_Mesh_Info(const bool full) const |
void Rectangle::Print_Mesh_Info(const bool full) const |
917 |
} |
} |
918 |
|
|
919 |
//protected |
//protected |
920 |
|
dim_t Rectangle::getNumDOF() const |
921 |
|
{ |
922 |
|
return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY; |
923 |
|
} |
924 |
|
|
925 |
|
//protected |
926 |
dim_t Rectangle::getNumFaceElements() const |
dim_t Rectangle::getNumFaceElements() const |
927 |
{ |
{ |
928 |
const IndexVector faces = getNumFacesPerBoundary(); |
const IndexVector faces = getNumFacesPerBoundary(); |
955 |
} |
} |
956 |
} |
} |
957 |
|
|
958 |
//private |
//protected |
959 |
void Rectangle::populateSampleIds() |
dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const |
960 |
{ |
{ |
961 |
// identifiers are ordered from left to right, bottom to top on each rank, |
const dim_t nDOF0 = (m_gNE0+1)/m_NX; |
962 |
// except for the shared nodes which are owned by the rank below / to the |
const dim_t nDOF1 = (m_gNE1+1)/m_NY; |
963 |
// left of the current rank |
const int x=node%nDOF0; |
964 |
|
const int y=node/nDOF0; |
965 |
// build node distribution vector first. |
dim_t num=0; |
966 |
// m_nodeDistribution[i] is the first node id on rank i, that is |
// loop through potential neighbours and add to index if positions are |
967 |
// rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes |
// within bounds |
968 |
m_nodeDistribution.assign(m_mpiInfo->size+1, 0); |
for (int i1=-1; i1<2; i1++) { |
969 |
m_nodeDistribution[1]=getNumNodes(); |
for (int i0=-1; i0<2; i0++) { |
970 |
for (dim_t k=1; k<m_mpiInfo->size-1; k++) { |
// skip node itself |
971 |
const index_t x=k%m_NX; |
if (i0==0 && i1==0) |
972 |
const index_t y=k/m_NX; |
continue; |
973 |
index_t numNodes=getNumNodes(); |
// location of neighbour node |
974 |
if (x>0) |
const int nx=x+i0; |
975 |
numNodes-=m_N1; |
const int ny=y+i1; |
976 |
if (y>0) |
if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) { |
977 |
numNodes-=m_N0; |
index.push_back(ny*nDOF0+nx); |
978 |
if (x>0 && y>0) |
num++; |
979 |
numNodes++; // subtracted corner twice -> fix that |
} |
980 |
m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes; |
} |
981 |
} |
} |
|
m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal(); |
|
982 |
|
|
983 |
m_nodeId.resize(getNumNodes()); |
return num; |
984 |
|
} |
985 |
|
|
986 |
|
//protected |
987 |
|
void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const |
988 |
|
{ |
989 |
|
const dim_t numComp = in.getDataPointSize(); |
990 |
|
out.requireWrite(); |
991 |
|
|
|
// the bottom row and left column are not owned by this rank so the |
|
|
// identifiers need to be computed accordingly |
|
992 |
const index_t left = (m_offset0==0 ? 0 : 1); |
const index_t left = (m_offset0==0 ? 0 : 1); |
993 |
const index_t bottom = (m_offset1==0 ? 0 : 1); |
const index_t bottom = (m_offset1==0 ? 0 : 1); |
994 |
if (left>0) { |
const dim_t nDOF0 = (m_gNE0+1)/m_NX; |
995 |
const int neighbour=m_mpiInfo->rank-1; |
const dim_t nDOF1 = (m_gNE1+1)/m_NY; |
|
const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1); |
|
996 |
#pragma omp parallel for |
#pragma omp parallel for |
997 |
for (dim_t i1=bottom; i1<m_N1; i1++) { |
for (index_t i=0; i<nDOF1; i++) { |
998 |
m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour] |
for (index_t j=0; j<nDOF0; j++) { |
999 |
+ (i1-bottom+1)*leftN0-1; |
const index_t n=j+left+(i+bottom)*m_N0; |
1000 |
|
const double* src=in.getSampleDataRO(n); |
1001 |
|
copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0)); |
1002 |
} |
} |
1003 |
} |
} |
1004 |
if (bottom>0) { |
} |
1005 |
const int neighbour=m_mpiInfo->rank-m_NX; |
|
1006 |
const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1); |
//protected |
1007 |
const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1); |
void Rectangle::dofToNodes(escript::Data& out, escript::Data& in) const |
1008 |
|
{ |
1009 |
|
const dim_t numComp = in.getDataPointSize(); |
1010 |
|
Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp); |
1011 |
|
in.requireWrite(); |
1012 |
|
Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0)); |
1013 |
|
|
1014 |
|
const dim_t numDOF = getNumDOF(); |
1015 |
|
out.requireWrite(); |
1016 |
|
const double* buffer = Paso_Coupler_finishCollect(coupler); |
1017 |
|
|
1018 |
#pragma omp parallel for |
#pragma omp parallel for |
1019 |
for (dim_t i0=left; i0<m_N0; i0++) { |
for (index_t i=0; i<getNumNodes(); i++) { |
1020 |
m_nodeId[i0]=m_nodeDistribution[neighbour] |
const double* src=(m_dofMap[i]<numDOF ? |
1021 |
+ (bottomN1-1)*bottomN0 + i0 - left; |
in.getSampleDataRO(m_dofMap[i]) |
1022 |
} |
: &buffer[(m_dofMap[i]-numDOF)*numComp]); |
1023 |
|
copy(src, src+numComp, out.getSampleDataRW(i)); |
1024 |
} |
} |
1025 |
if (left>0 && bottom>0) { |
} |
1026 |
const int neighbour=m_mpiInfo->rank-m_NX-1; |
|
1027 |
const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1); |
//private |
1028 |
const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1); |
void Rectangle::populateSampleIds() |
1029 |
m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1; |
{ |
1030 |
|
// identifiers are ordered from left to right, bottom to top globablly. |
1031 |
|
|
1032 |
|
// build node distribution vector first. |
1033 |
|
// rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes |
1034 |
|
m_nodeDistribution.assign(m_mpiInfo->size+1, 0); |
1035 |
|
const dim_t numDOF=getNumDOF(); |
1036 |
|
for (dim_t k=1; k<m_mpiInfo->size; k++) { |
1037 |
|
m_nodeDistribution[k]=k*numDOF; |
1038 |
} |
} |
1039 |
|
m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal(); |
1040 |
|
m_nodeId.resize(getNumNodes()); |
1041 |
|
m_dofId.resize(numDOF); |
1042 |
|
m_elementId.resize(getNumElements()); |
1043 |
|
m_faceId.resize(getNumFaceElements()); |
1044 |
|
|
1045 |
// the rest of the id's are contiguous |
#pragma omp parallel |
1046 |
const index_t firstId=m_nodeDistribution[m_mpiInfo->rank]; |
{ |
1047 |
#pragma omp parallel for |
// nodes |
1048 |
for (dim_t i1=bottom; i1<m_N1; i1++) { |
#pragma omp for nowait |
1049 |
for (dim_t i0=left; i0<m_N0; i0++) { |
for (dim_t i1=0; i1<m_N1; i1++) { |
1050 |
m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left); |
for (dim_t i0=0; i0<m_N0; i0++) { |
1051 |
|
m_nodeId[i0+i1*m_N0] = (m_offset1+i1)*(m_gNE0+1)+m_offset0+i0; |
1052 |
|
} |
1053 |
} |
} |
|
} |
|
1054 |
|
|
1055 |
// element id's |
// degrees of freedom |
1056 |
m_elementId.resize(getNumElements()); |
#pragma omp for nowait |
1057 |
#pragma omp parallel for |
for (dim_t k=0; k<numDOF; k++) |
1058 |
for (dim_t k=0; k<getNumElements(); k++) { |
m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k; |
|
m_elementId[k]=k; |
|
|
} |
|
1059 |
|
|
1060 |
// face element id's |
// elements |
1061 |
m_faceId.resize(getNumFaceElements()); |
#pragma omp for nowait |
1062 |
#pragma omp parallel for |
for (dim_t i1=0; i1<m_NE1; i1++) { |
1063 |
for (dim_t k=0; k<getNumFaceElements(); k++) { |
for (dim_t i0=0; i0<m_NE0; i0++) { |
1064 |
m_faceId[k]=k; |
m_elementId[i0+i1*m_NE0]=(m_offset1+i1)*m_gNE0+m_offset0+i0; |
1065 |
} |
} |
1066 |
|
} |
1067 |
|
|
1068 |
|
// face elements |
1069 |
|
#pragma omp for |
1070 |
|
for (dim_t k=0; k<getNumFaceElements(); k++) |
1071 |
|
m_faceId[k]=k; |
1072 |
|
} // end parallel section |
1073 |
|
|
1074 |
|
m_nodeTags.assign(getNumNodes(), 0); |
1075 |
|
updateTagsInUse(Nodes); |
1076 |
|
|
1077 |
|
m_elementTags.assign(getNumElements(), 0); |
1078 |
|
updateTagsInUse(Elements); |
1079 |
|
|
1080 |
// generate face offset vector and set face tags |
// generate face offset vector and set face tags |
1081 |
const IndexVector facesPerEdge = getNumFacesPerBoundary(); |
const IndexVector facesPerEdge = getNumFacesPerBoundary(); |
1099 |
} |
} |
1100 |
|
|
1101 |
//private |
//private |
1102 |
int Rectangle::insertNeighbours(IndexVector& index, index_t node) const |
void Rectangle::createPattern() |
1103 |
{ |
{ |
1104 |
const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1); |
const dim_t nDOF0 = (m_gNE0+1)/m_NX; |
1105 |
const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1); |
const dim_t nDOF1 = (m_gNE1+1)/m_NY; |
1106 |
const int x=node%myN0; |
const index_t left = (m_offset0==0 ? 0 : 1); |
1107 |
const int y=node/myN0; |
const index_t bottom = (m_offset1==0 ? 0 : 1); |
1108 |
int num=0; |
|
1109 |
if (y>0) { |
// populate node->DOF mapping with own degrees of freedom. |
1110 |
if (x>0) { |
// The rest is assigned in the loop further down |
1111 |
// bottom-left |
m_dofMap.assign(getNumNodes(), 0); |
1112 |
index.push_back(node-myN0-1); |
#pragma omp parallel for |
1113 |
num++; |
for (index_t i=bottom; i<m_N1; i++) { |
1114 |
} |
for (index_t j=left; j<m_N0; j++) { |
1115 |
// bottom |
m_dofMap[i*m_N0+j]=(i-bottom)*nDOF0+j-left; |
|
index.push_back(node-myN0); |
|
|
num++; |
|
|
if (x<myN0-1) { |
|
|
// bottom-right |
|
|
index.push_back(node-myN0+1); |
|
|
num++; |
|
|
} |
|
|
} |
|
|
if (x<myN0-1) { |
|
|
// right |
|
|
index.push_back(node+1); |
|
|
num++; |
|
|
if (y<myN1-1) { |
|
|
// top-right |
|
|
index.push_back(node+myN0+1); |
|
|
num++; |
|
1116 |
} |
} |
1117 |
} |
} |
1118 |
if (y<myN1-1) { |
|
1119 |
// top |
// build list of shared components and neighbours by looping through |
1120 |
index.push_back(node+myN0); |
// all potential neighbouring ranks and checking if positions are |
1121 |
num++; |
// within bounds |
1122 |
if (x>0) { |
const dim_t numDOF=nDOF0*nDOF1; |
1123 |
// top-left |
vector<IndexVector> colIndices(numDOF); // for the couple blocks |
1124 |
index.push_back(node+myN0-1); |
RankVector neighbour; |
1125 |
num++; |
IndexVector offsetInShared(1,0); |
1126 |
|
IndexVector sendShared, recvShared; |
1127 |
|
int numShared=0; |
1128 |
|
const int x=m_mpiInfo->rank%m_NX; |
1129 |
|
const int y=m_mpiInfo->rank/m_NX; |
1130 |
|
for (int i1=-1; i1<2; i1++) { |
1131 |
|
for (int i0=-1; i0<2; i0++) { |
1132 |
|
// skip this rank |
1133 |
|
if (i0==0 && i1==0) |
1134 |
|
continue; |
1135 |
|
// location of neighbour rank |
1136 |
|
const int nx=x+i0; |
1137 |
|
const int ny=y+i1; |
1138 |
|
if (nx>=0 && ny>=0 && nx<m_NX && ny<m_NY) { |
1139 |
|
neighbour.push_back(ny*m_NX+nx); |
1140 |
|
if (i0==0) { |
1141 |
|
// sharing top or bottom edge |
1142 |
|
const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0); |
1143 |
|
const int firstNode=(i1==-1 ? left : m_N0*(m_N1-1)+left); |
1144 |
|
offsetInShared.push_back(offsetInShared.back()+nDOF0); |
1145 |
|
for (dim_t i=0; i<nDOF0; i++, numShared++) { |
1146 |
|
sendShared.push_back(firstDOF+i); |
1147 |
|
recvShared.push_back(numDOF+numShared); |
1148 |
|
if (i>0) |
1149 |
|
colIndices[firstDOF+i-1].push_back(numShared); |
1150 |
|
colIndices[firstDOF+i].push_back(numShared); |
1151 |
|
if (i<nDOF0-1) |
1152 |
|
colIndices[firstDOF+i+1].push_back(numShared); |
1153 |
|
m_dofMap[firstNode+i]=numDOF+numShared; |
1154 |
|
} |
1155 |
|
} else if (i1==0) { |
1156 |
|
// sharing left or right edge |
1157 |
|
const int firstDOF=(i0==-1 ? 0 : nDOF0-1); |
1158 |
|
const int firstNode=(i0==-1 ? bottom*m_N0 : (bottom+1)*m_N0-1); |
1159 |
|
offsetInShared.push_back(offsetInShared.back()+nDOF1); |
1160 |
|
for (dim_t i=0; i<nDOF1; i++, numShared++) { |
1161 |
|
sendShared.push_back(firstDOF+i*nDOF0); |
1162 |
|
recvShared.push_back(numDOF+numShared); |
1163 |
|
if (i>0) |
1164 |
|
colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared); |
1165 |
|
colIndices[firstDOF+i*nDOF0].push_back(numShared); |
1166 |
|
if (i<nDOF1-1) |
1167 |
|
colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared); |
1168 |
|
m_dofMap[firstNode+i*m_N0]=numDOF+numShared; |
1169 |
|
} |
1170 |
|
} else { |
1171 |
|
// sharing a node |
1172 |
|
const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0); |
1173 |
|
const int node=(i0+1)/2*(m_N0-1)+(i1+1)/2*m_N0*(m_N1-1); |
1174 |
|
offsetInShared.push_back(offsetInShared.back()+1); |
1175 |
|
sendShared.push_back(dof); |
1176 |
|
recvShared.push_back(numDOF+numShared); |
1177 |
|
colIndices[dof].push_back(numShared); |
1178 |
|
m_dofMap[node]=numDOF+numShared; |
1179 |
|
++numShared; |
1180 |
|
} |
1181 |
|
} |
1182 |
} |
} |
1183 |
} |
} |
|
if (x>0) { |
|
|
// left |
|
|
index.push_back(node-1); |
|
|
num++; |
|
|
} |
|
1184 |
|
|
1185 |
return num; |
// create connector |
1186 |
} |
Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( |
1187 |
|
numDOF, neighbour.size(), &neighbour[0], &sendShared[0], |
1188 |
|
&offsetInShared[0], 1, 0, m_mpiInfo); |
1189 |
|
Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc( |
1190 |
|
numDOF, neighbour.size(), &neighbour[0], &recvShared[0], |
1191 |
|
&offsetInShared[0], 1, 0, m_mpiInfo); |
1192 |
|
m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp); |
1193 |
|
Paso_SharedComponents_free(snd_shcomp); |
1194 |
|
Paso_SharedComponents_free(rcv_shcomp); |
1195 |
|
|
1196 |
//private |
// create main and couple blocks |
1197 |
void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const |
Paso_Pattern *mainPattern = createMainPattern(); |
1198 |
{ |
Paso_Pattern *colPattern, *rowPattern; |
1199 |
IndexVector ptr(1,0); |
createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern); |
|
IndexVector index; |
|
|
const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1); |
|
|
const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1); |
|
|
const IndexVector faces=getNumFacesPerBoundary(); |
|
1200 |
|
|
1201 |
// bottom edge |
// allocate paso distribution |
1202 |
dim_t n=0; |
Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, |
1203 |
if (faces[0] == 0) { |
const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0); |
|
index.push_back(2*(myN0+myN1+1)); |
|
|
index.push_back(2*(myN0+myN1+1)+1); |
|
|
n+=2; |
|
|
if (faces[2] == 0) { |
|
|
index.push_back(0); |
|
|
index.push_back(1); |
|
|
index.push_back(2); |
|
|
n+=3; |
|
|
} |
|
|
} else if (faces[2] == 0) { |
|
|
index.push_back(1); |
|
|
index.push_back(2); |
|
|
n+=2; |
|
|
} |
|
|
// n=neighbours of bottom-left corner node |
|
|
ptr.push_back(ptr.back()+n); |
|
|
n=0; |
|
|
if (faces[2] == 0) { |
|
|
for (dim_t i=1; i<myN0-1; i++) { |
|
|
index.push_back(i); |
|
|
index.push_back(i+1); |
|
|
index.push_back(i+2); |
|
|
ptr.push_back(ptr.back()+3); |
|
|
} |
|
|
index.push_back(myN0-1); |
|
|
index.push_back(myN0); |
|
|
n+=2; |
|
|
if (faces[1] == 0) { |
|
|
index.push_back(myN0+1); |
|
|
index.push_back(myN0+2); |
|
|
index.push_back(myN0+3); |
|
|
n+=3; |
|
|
} |
|
|
} else { |
|
|
for (dim_t i=1; i<myN0-1; i++) { |
|
|
ptr.push_back(ptr.back()); |
|
|
} |
|
|
if (faces[1] == 0) { |
|
|
index.push_back(myN0+2); |
|
|
index.push_back(myN0+3); |
|
|
n+=2; |
|
|
} |
|
|
} |
|
|
// n=neighbours of bottom-right corner node |
|
|
ptr.push_back(ptr.back()+n); |
|
1204 |
|
|
1205 |
// 2nd row to 2nd last row |
// finally create the system matrix |
1206 |
for (dim_t i=1; i<myN1-1; i++) { |
m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT, |
1207 |
// left edge |
distribution, distribution, mainPattern, colPattern, rowPattern, |
1208 |
if (faces[0] == 0) { |
m_connector, m_connector); |
|
index.push_back(2*(myN0+myN1+2)-i); |
|
|
index.push_back(2*(myN0+myN1+2)-i-1); |
|
|
index.push_back(2*(myN0+myN1+2)-i-2); |
|
|
ptr.push_back(ptr.back()+3); |
|
|
} else { |
|
|
ptr.push_back(ptr.back()); |
|
|
} |
|
|
for (dim_t j=1; j<myN0-1; j++) { |
|
|
ptr.push_back(ptr.back()); |
|
|
} |
|
|
// right edge |
|
|
if (faces[1] == 0) { |
|
|
index.push_back(myN0+i+1); |
|
|
index.push_back(myN0+i+2); |
|
|
index.push_back(myN0+i+3); |
|
|
ptr.push_back(ptr.back()+3); |
|
|
} else { |
|
|
ptr.push_back(ptr.back()); |
|
|
} |
|
|
} |
|
1209 |
|
|
1210 |
// top edge |
Paso_Distribution_free(distribution); |
|
n=0; |
|
|
if (faces[0] == 0) { |
|
|
index.push_back(2*myN0+myN1+5); |
|
|
index.push_back(2*myN0+myN1+4); |
|
|
n+=2; |
|
|
if (faces[3] == 0) { |
|
|
index.push_back(2*myN0+myN1+3); |
|
|
index.push_back(2*myN0+myN1+2); |
|
|
index.push_back(2*myN0+myN1+1); |
|
|
n+=3; |
|
|
} |
|
|
} else if (faces[3] == 0) { |
|
|
index.push_back(2*myN0+myN1+2); |
|
|
index.push_back(2*myN0+myN1+1); |
|
|
n+=2; |
|
|
} |
|
|
// n=neighbours of top-left corner node |
|
|
ptr.push_back(ptr.back()+n); |
|
|
n=0; |
|
|
if (faces[3] == 0) { |
|
|
for (dim_t i=1; i<myN0-1; i++) { |
|
|
index.push_back(2*myN0+myN1+i+1); |
|
|
index.push_back(2*myN0+myN1+i); |
|
|
index.push_back(2*myN0+myN1+i-1); |
|
|
ptr.push_back(ptr.back()+3); |
|
|
} |
|
|
index.push_back(myN0+myN1+4); |
|
|
index.push_back(myN0+myN1+3); |
|
|
n+=2; |
|
|
if (faces[1] == 0) { |
|
|
index.push_back(myN0+myN1+2); |
|
|
index.push_back(myN0+myN1+1); |
|
|
index.push_back(myN0+myN1); |
|
|
n+=3; |
|
|
} |
|
|
} else { |
|
|
for (dim_t i=1; i<myN0-1; i++) { |
|
|
ptr.push_back(ptr.back()); |
|
|
} |
|
|
if (faces[1] == 0) { |
|
|
index.push_back(myN0+myN1+1); |
|
|
index.push_back(myN0+myN1); |
|
|
n+=2; |
|
|
} |
|
|
} |
|
|
// n=neighbours of top-right corner node |
|
|
ptr.push_back(ptr.back()+n); |
|
1211 |
|
|
1212 |
dim_t M=ptr.size()-1; |
// useful debug output |
1213 |
map<index_t,index_t> idMap; |
/* |
1214 |
dim_t N=0; |
cout << "--- rcv_shcomp ---" << endl; |
1215 |
for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) { |
cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl; |
1216 |
if (idMap.find(*it)==idMap.end()) { |
for (size_t i=0; i<neighbour.size(); i++) { |
1217 |
idMap[*it]=N; |
cout << "neighbor[" << i << "]=" << neighbour[i] |
1218 |
*it=N++; |
<< " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl; |
|
} else { |
|
|
*it=idMap[*it]; |
|
|
} |
|
1219 |
} |
} |
1220 |
|
for (size_t i=0; i<recvShared.size(); i++) { |
1221 |
|
cout << "shared[" << i << "]=" << recvShared[i] << endl; |
1222 |
|
} |
1223 |
|
cout << "--- snd_shcomp ---" << endl; |
1224 |
|
for (size_t i=0; i<sendShared.size(); i++) { |
1225 |
|
cout << "shared[" << i << "]=" << sendShared[i] << endl; |
1226 |
|
} |
1227 |
|
cout << "--- dofMap ---" << endl; |
1228 |
|
for (size_t i=0; i<m_dofMap.size(); i++) { |
1229 |
|
cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl; |
1230 |
|
} |
1231 |
|
cout << "--- colIndices ---" << endl; |
1232 |
|
for (size_t i=0; i<colIndices.size(); i++) { |
1233 |
|
cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl; |
1234 |
|
} |
1235 |
|
*/ |
1236 |
|
|
1237 |
/* |
/* |
1238 |
cout << "--- colCouple_pattern ---" << endl; |
cout << "--- main_pattern ---" << endl; |
1239 |
cout << "M=" << M << ", N=" << N << endl; |
cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl; |
1240 |
for (size_t i=0; i<ptr.size(); i++) { |
for (size_t i=0; i<mainPattern->numOutput+1; i++) { |
1241 |
cout << "ptr[" << i << "]=" << ptr[i] << endl; |
cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl; |
1242 |
} |
} |
1243 |
for (size_t i=0; i<index.size(); i++) { |
for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) { |
1244 |
cout << "index[" << i << "]=" << index[i] << endl; |
cout << "index[" << i << "]=" << mainPattern->index[i] << endl; |
1245 |
} |
} |
1246 |
*/ |
*/ |
1247 |
|
|
1248 |
// now build the row couple pattern |
/* |
1249 |
IndexVector ptr2(1,0); |
cout << "--- colCouple_pattern ---" << endl; |
1250 |
IndexVector index2; |
cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl; |
1251 |
for (dim_t id=0; id<N; id++) { |
for (size_t i=0; i<colPattern->numOutput+1; i++) { |
1252 |
n=0; |
cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl; |
1253 |
for (dim_t i=0; i<M; i++) { |
} |
1254 |
for (dim_t j=ptr[i]; j<ptr[i+1]; j++) { |
for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) { |
1255 |
if (index[j]==id) { |
cout << "index[" << i << "]=" << colPattern->index[i] << endl; |
|
index2.push_back(i); |
|
|
n++; |
|
|
break; |
|
|
} |
|
|
} |
|
|
} |
|
|
ptr2.push_back(ptr2.back()+n); |
|
1256 |
} |
} |
1257 |
|
*/ |
1258 |
|
|
1259 |
/* |
/* |
1260 |
cout << "--- rowCouple_pattern ---" << endl; |
cout << "--- rowCouple_pattern ---" << endl; |
1261 |
cout << "M=" << N << ", N=" << M << endl; |
cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl; |
1262 |
for (size_t i=0; i<ptr2.size(); i++) { |
for (size_t i=0; i<rowPattern->numOutput+1; i++) { |
1263 |
cout << "ptr[" << i << "]=" << ptr2[i] << endl; |
cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl; |
1264 |
} |
} |
1265 |
for (size_t i=0; i<index2.size(); i++) { |
for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) { |
1266 |
cout << "index[" << i << "]=" << index2[i] << endl; |
cout << "index[" << i << "]=" << rowPattern->index[i] << endl; |
1267 |
} |
} |
1268 |
*/ |
*/ |
1269 |
|
|
1270 |
// paso will manage the memory |
Paso_Pattern_free(mainPattern); |
1271 |
index_t* indexC = MEMALLOC(index.size(), index_t); |
Paso_Pattern_free(colPattern); |
1272 |
index_t* ptrC = MEMALLOC(ptr.size(), index_t); |
Paso_Pattern_free(rowPattern); |
|
copy(index.begin(), index.end(), indexC); |
|
|
copy(ptr.begin(), ptr.end(), ptrC); |
|
|
*colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC); |
|
|
|
|
|
// paso will manage the memory |
|
|
indexC = MEMALLOC(index2.size(), index_t); |
|
|
ptrC = MEMALLOC(ptr2.size(), index_t); |
|
|
copy(index2.begin(), index2.end(), indexC); |
|
|
copy(ptr2.begin(), ptr2.end(), ptrC); |
|
|
*rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC); |
|
1273 |
} |
} |
1274 |
|
|
1275 |
//protected |
//protected |
1278 |
{ |
{ |
1279 |
const dim_t numComp = in.getDataPointSize(); |
const dim_t numComp = in.getDataPointSize(); |
1280 |
if (reduced) { |
if (reduced) { |
1281 |
|
out.requireWrite(); |
1282 |
/*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */ |
/*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */ |
1283 |
const double c0 = .25; |
const double c0 = .25; |
1284 |
#pragma omp parallel for |
#pragma omp parallel for |
1296 |
} /* end of k1 loop */ |
} /* end of k1 loop */ |
1297 |
/* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */ |
/* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */ |
1298 |
} else { |
} else { |
1299 |
|
out.requireWrite(); |
1300 |
/*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */ |
/*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */ |
1301 |
const double c0 = .16666666666666666667; |
const double c0 = .16666666666666666667; |
1302 |
const double c1 = .044658198738520451079; |
const double c1 = .044658198738520451079; |
1327 |
{ |
{ |
1328 |
const dim_t numComp = in.getDataPointSize(); |
const dim_t numComp = in.getDataPointSize(); |
1329 |
if (reduced) { |
if (reduced) { |
1330 |
|
out.requireWrite(); |
1331 |
const double c0 = .5; |
const double c0 = .5; |
1332 |
#pragma omp parallel |
#pragma omp parallel |
1333 |
{ |
{ |
1379 |
/* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */ |
/* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */ |
1380 |
} // end of parallel section |
} // end of parallel section |
1381 |
} else { |
} else { |
1382 |
|
out.requireWrite(); |
1383 |
const double c0 = 0.21132486540518711775; |
const double c0 = 0.21132486540518711775; |
1384 |
const double c1 = 0.78867513459481288225; |
const double c1 = 0.78867513459481288225; |
1385 |
#pragma omp parallel |
#pragma omp parallel |
1438 |
} |
} |
1439 |
} |
} |
1440 |
|
|
1441 |
|
//protected |
1442 |
|
void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, |
1443 |
|
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
1444 |
|
const escript::Data& C, const escript::Data& D, |
1445 |
|
const escript::Data& X, const escript::Data& Y, |
1446 |
|
const escript::Data& d, const escript::Data& y) const |
1447 |
|
{ |
1448 |
|
const double h0 = m_l0/m_gNE0; |
1449 |
|
const double h1 = m_l1/m_gNE1; |
1450 |
|
/*** GENERATOR SNIP_PDE_SINGLE_PRE TOP */ |
1451 |
|
const double w0 = -0.1555021169820365539*h1/h0; |
1452 |
|
const double w1 = 0.041666666666666666667; |
1453 |
|
const double w10 = -0.041666666666666666667*h0/h1; |
1454 |
|
const double w11 = 0.1555021169820365539*h1/h0; |
1455 |
|
const double w12 = 0.1555021169820365539*h0/h1; |
1456 |
|
const double w13 = 0.01116454968463011277*h0/h1; |
1457 |
|
const double w14 = 0.01116454968463011277*h1/h0; |
1458 |
|
const double w15 = 0.041666666666666666667*h1/h0; |
1459 |
|
const double w16 = -0.01116454968463011277*h0/h1; |
1460 |
|
const double w17 = -0.1555021169820365539*h0/h1; |
1461 |
|
const double w18 = -0.33333333333333333333*h1/h0; |
1462 |
|
const double w19 = 0.25000000000000000000; |
1463 |
|
const double w2 = -0.15550211698203655390; |
1464 |
|
const double w20 = -0.25000000000000000000; |
1465 |
|
const double w21 = 0.16666666666666666667*h0/h1; |
1466 |
|
const double w22 = -0.16666666666666666667*h1/h0; |
1467 |
|
const double w23 = -0.16666666666666666667*h0/h1; |
1468 |
|
const double w24 = 0.33333333333333333333*h1/h0; |
1469 |
|
const double w25 = 0.33333333333333333333*h0/h1; |
1470 |
|
const double w26 = 0.16666666666666666667*h1/h0; |
1471 |
|
const double w27 = -0.33333333333333333333*h0/h1; |
1472 |
|
const double w28 = -0.032861463941450536761*h1; |
1473 |
|
const double w29 = -0.032861463941450536761*h0; |
1474 |
|
const double w3 = 0.041666666666666666667*h0/h1; |
1475 |
|
const double w30 = -0.12264065304058601714*h1; |
1476 |
|
const double w31 = -0.0023593469594139828636*h1; |
1477 |
|
const double w32 = -0.008805202725216129906*h0; |
1478 |
|
const double w33 = -0.008805202725216129906*h1; |
1479 |
|
const double w34 = 0.032861463941450536761*h1; |
1480 |
|
const double w35 = 0.008805202725216129906*h1; |
1481 |
|
const double w36 = 0.008805202725216129906*h0; |
1482 |
|
const double w37 = 0.0023593469594139828636*h1; |
1483 |
|
const double w38 = 0.12264065304058601714*h1; |
1484 |
|
const double w39 = 0.032861463941450536761*h0; |
1485 |
|
const double w4 = 0.15550211698203655390; |
1486 |
|
const double w40 = -0.12264065304058601714*h0; |
1487 |
|
const double w41 = -0.0023593469594139828636*h0; |
1488 |
|
const double w42 = 0.0023593469594139828636*h0; |
1489 |
|
const double w43 = 0.12264065304058601714*h0; |
1490 |
|
const double w44 = -0.16666666666666666667*h1; |
1491 |
|
const double w45 = -0.083333333333333333333*h0; |
1492 |
|
const double w46 = 0.083333333333333333333*h1; |
1493 |
|
const double w47 = 0.16666666666666666667*h1; |
1494 |
|
const double w48 = 0.083333333333333333333*h0; |
1495 |
|
const double w49 = -0.16666666666666666667*h0; |
1496 |
|
const double w5 = -0.041666666666666666667; |
1497 |
|
const double w50 = 0.16666666666666666667*h0; |
1498 |
|
const double w51 = -0.083333333333333333333*h1; |
1499 |
|
const double w52 = 0.025917019497006092316*h0*h1; |
1500 |
|
const double w53 = 0.0018607582807716854616*h0*h1; |
1501 |
|
const double w54 = 0.0069444444444444444444*h0*h1; |
1502 |
|
const double w55 = 0.09672363354357992482*h0*h1; |
1503 |
|
const double w56 = 0.00049858867864229740201*h0*h1; |
1504 |
|
const double w57 = 0.055555555555555555556*h0*h1; |
1505 |
|
const double w58 = 0.027777777777777777778*h0*h1; |
1506 |
|
const double w59 = 0.11111111111111111111*h0*h1; |
1507 |
|
const double w6 = -0.01116454968463011277*h1/h0; |
1508 |
|
const double w60 = -0.19716878364870322056*h1; |
1509 |
|
const double w61 = -0.19716878364870322056*h0; |
1510 |
|
const double w62 = -0.052831216351296779436*h0; |
1511 |
|
const double w63 = -0.052831216351296779436*h1; |
1512 |
|
const double w64 = 0.19716878364870322056*h1; |
1513 |
|
const double w65 = 0.052831216351296779436*h1; |
1514 |
|
const double w66 = 0.19716878364870322056*h0; |
1515 |
|
const double w67 = 0.052831216351296779436*h0; |
1516 |
|
const double w68 = -0.5*h1; |
1517 |
|
const double w69 = -0.5*h0; |
1518 |
|
const double w7 = 0.011164549684630112770; |
1519 |
|
const double w70 = 0.5*h1; |
1520 |
|
const double w71 = 0.5*h0; |
1521 |
|
const double w72 = 0.1555021169820365539*h0*h1; |
1522 |
|
const double w73 = 0.041666666666666666667*h0*h1; |
1523 |
|
const double w74 = 0.01116454968463011277*h0*h1; |
1524 |
|
const double w75 = 0.25*h0*h1; |
1525 |
|
const double w8 = -0.011164549684630112770; |
1526 |
|
const double w9 = -0.041666666666666666667*h1/h0; |
1527 |
|
/* GENERATOR SNIP_PDE_SINGLE_PRE BOTTOM */ |
1528 |
|
|
1529 |
|
rhs.requireWrite(); |
1530 |
|
#pragma omp parallel |
1531 |
|
{ |
1532 |
|
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
1533 |
|
#pragma omp for |
1534 |
|
for (index_t k1=k1_0; k1<m_NE1; k1+=2) { |
1535 |
|
for (index_t k0=0; k0<m_NE0; ++k0) { |
1536 |
|
bool add_EM_S=false; |
1537 |
|
bool add_EM_F=false; |
1538 |
|
vector<double> EM_S(4*4, 0); |
1539 |
|
vector<double> EM_F(4, 0); |
1540 |
|
const index_t e = k0 + m_NE0*k1; |
1541 |
|
/*** GENERATOR SNIP_PDE_SINGLE TOP */ |
1542 |
|
/////////////// |
1543 |
|
// process A // |
1544 |
|
/////////////// |
1545 |
|
if (!A.isEmpty()) { |
1546 |
|
add_EM_S=true; |
1547 |
|
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); |
1548 |
|
if (A.actsExpanded()) { |
1549 |
|
const register double A_00_0 = A_p[INDEX3(0,0,0,2,2)]; |
1550 |
|
const register double A_01_0 = A_p[INDEX3(0,1,0,2,2)]; |
1551 |
|
const register double A_10_0 = A_p[INDEX3(1,0,0,2,2)]; |
1552 |
|
const register double A_11_0 = A_p[INDEX3(1,1,0,2,2)]; |
1553 |
|
const register double A_00_1 = A_p[INDEX3(0,0,1,2,2)]; |
1554 |
|
const register double A_01_1 = A_p[INDEX3(0,1,1,2,2)]; |
1555 |
|
const register double A_10_1 = A_p[INDEX3(1,0,1,2,2)]; |
1556 |
|
const register double A_11_1 = A_p[INDEX3(1,1,1,2,2)]; |
1557 |
|
const register double A_00_2 = A_p[INDEX3(0,0,2,2,2)]; |
1558 |
|
const register double A_01_2 = A_p[INDEX3(0,1,2,2,2)]; |
1559 |
|
const register double A_10_2 = A_p[INDEX3(1,0,2,2,2)]; |
1560 |
|
const register double A_11_2 = A_p[INDEX3(1,1,2,2,2)]; |
1561 |
|
const register double A_00_3 = A_p[INDEX3(0,0,3,2,2)]; |
1562 |
|
const register double A_01_3 = A_p[INDEX3(0,1,3,2,2)]; |
1563 |
|
const register double A_10_3 = A_p[INDEX3(1,0,3,2,2)]; |
1564 |
|
const register double A_11_3 = A_p[INDEX3(1,1,3,2,2)]; |
1565 |
|
const register double tmp4_0 = A_10_1 + A_10_2; |
1566 |
|
const register double tmp12_0 = A_11_0 + A_11_2; |
1567 |
|
const register double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; |
1568 |
|
const register double tmp10_0 = A_01_3 + A_10_3; |
1569 |
|
const register double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; |
1570 |
|
const register double tmp0_0 = A_01_0 + A_01_3; |
1571 |
|
const register double tmp13_0 = A_01_2 + A_10_1; |
1572 |
|
const register double tmp3_0 = A_00_2 + A_00_3; |
1573 |
|
const register double tmp11_0 = A_11_1 + A_11_3; |
1574 |
|
const register double tmp18_0 = A_01_1 + A_10_1; |
1575 |
|
const register double tmp1_0 = A_00_0 + A_00_1; |
1576 |
|
const register double tmp15_0 = A_01_1 + A_10_2; |
1577 |
|
const register double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; |
1578 |
|
const register double tmp16_0 = A_10_0 + A_10_3; |
1579 |
|
const register double tmp6_0 = A_01_3 + A_10_0; |
1580 |
|
const register double tmp17_0 = A_01_1 + A_01_2; |
1581 |
|
const register double tmp9_0 = A_01_0 + A_10_0; |
1582 |
|
const register double tmp7_0 = A_01_0 + A_10_3; |
1583 |
|
const register double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; |
1584 |
|
const register double tmp19_0 = A_01_2 + A_10_2; |
1585 |
|
const register double tmp14_1 = A_10_0*w8; |
1586 |
|
const register double tmp23_1 = tmp3_0*w14; |
1587 |
|
const register double tmp35_1 = A_01_0*w8; |
1588 |
|
const register double tmp54_1 = tmp13_0*w8; |
1589 |
|
const register double tmp20_1 = tmp9_0*w4; |
1590 |
|
const register double tmp25_1 = tmp12_0*w12; |
1591 |
|
const register double tmp2_1 = A_01_1*w4; |
1592 |
|
const register double tmp44_1 = tmp7_0*w7; |
1593 |
|
const register double tmp26_1 = tmp10_0*w4; |
1594 |
|
const register double tmp52_1 = tmp18_0*w8; |
1595 |
|
const register double tmp48_1 = A_10_1*w7; |
1596 |
|
const register double tmp46_1 = A_01_3*w8; |
1597 |
|
const register double tmp50_1 = A_01_0*w2; |
1598 |
|
const register double tmp8_1 = tmp4_0*w5; |
1599 |
|
const register double tmp56_1 = tmp19_0*w8; |
1600 |
|
const register double tmp9_1 = tmp2_0*w10; |
1601 |
|
const register double tmp19_1 = A_10_3*w2; |
1602 |
|
const register double tmp47_1 = A_10_2*w4; |
1603 |
|
const register double tmp16_1 = tmp3_0*w0; |
1604 |
|
const register double tmp18_1 = tmp1_0*w6; |
1605 |
|
const register double tmp31_1 = tmp11_0*w12; |
1606 |
|
const register double tmp55_1 = tmp15_0*w2; |
1607 |
|
const register double tmp39_1 = A_10_2*w7; |
1608 |
|
const register double tmp11_1 = tmp6_0*w7; |
1609 |
|
const register double tmp40_1 = tmp11_0*w17; |
1610 |
|
const register double tmp34_1 = tmp15_0*w8; |
1611 |
|
const register double tmp33_1 = tmp14_0*w5; |
1612 |
|
const register double tmp24_1 = tmp11_0*w13; |
1613 |
|
const register double tmp3_1 = tmp1_0*w0; |
1614 |
|
const register double tmp5_1 = tmp2_0*w3; |
1615 |
|
const register double tmp43_1 = tmp17_0*w5; |
1616 |
|
const register double tmp15_1 = A_01_2*w4; |
1617 |
|
const register double tmp53_1 = tmp19_0*w2; |
1618 |
|
const register double tmp27_1 = tmp3_0*w11; |
1619 |
|
const register double tmp32_1 = tmp13_0*w2; |
1620 |
|
const register double tmp10_1 = tmp5_0*w9; |
1621 |
|
const register double tmp37_1 = A_10_1*w4; |
1622 |
|
const register double tmp38_1 = tmp5_0*w15; |
1623 |
|
const register double tmp17_1 = A_01_1*w7; |
1624 |
|
const register double tmp12_1 = tmp7_0*w4; |
1625 |
|
const register double tmp22_1 = tmp10_0*w7; |
1626 |
|
const register double tmp57_1 = tmp18_0*w2; |
1627 |
|
const register double tmp28_1 = tmp9_0*w7; |
1628 |
|
const register double tmp29_1 = tmp1_0*w14; |
1629 |
|
const register double tmp51_1 = tmp11_0*w16; |
1630 |
|
const register double tmp42_1 = tmp12_0*w16; |
1631 |
|
const register double tmp49_1 = tmp12_0*w17; |
1632 |
|
const register double tmp21_1 = tmp1_0*w11; |
1633 |
|
const register double tmp1_1 = tmp0_0*w1; |
1634 |
|
const register double tmp45_1 = tmp6_0*w4; |
1635 |
|
const register double tmp7_1 = A_10_0*w2; |
1636 |
|
const register double tmp6_1 = tmp3_0*w6; |
1637 |
|
const register double tmp13_1 = tmp8_0*w1; |
1638 |
|
const register double tmp36_1 = tmp16_0*w1; |
1639 |
|
const register double tmp41_1 = A_01_3*w2; |
1640 |
|
const register double tmp30_1 = tmp12_0*w13; |
1641 |
|
const register double tmp4_1 = A_01_2*w7; |
1642 |
|
const register double tmp0_1 = A_10_3*w8; |
1643 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; |
1644 |
|
EM_S[INDEX2(1,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; |
1645 |
|
EM_S[INDEX2(3,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; |
1646 |
|
EM_S[INDEX2(0,0,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; |
1647 |
|
EM_S[INDEX2(3,3,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1648 |
|
EM_S[INDEX2(3,0,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; |
1649 |
|
EM_S[INDEX2(3,1,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; |
1650 |
|
EM_S[INDEX2(2,1,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; |
1651 |
|
EM_S[INDEX2(0,2,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
1652 |
|
EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; |
1653 |
|
EM_S[INDEX2(1,3,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; |
1654 |
|
EM_S[INDEX2(2,3,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; |
1655 |
|
EM_S[INDEX2(2,2,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; |
1656 |
|
EM_S[INDEX2(1,0,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; |
1657 |
|
EM_S[INDEX2(0,3,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; |
1658 |
|
EM_S[INDEX2(1,1,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; |
1659 |
|
} else { /* constant data */ |
1660 |
|
const register double A_00 = A_p[INDEX2(0,0,2)]; |
1661 |
|
const register double A_01 = A_p[INDEX2(0,1,2)]; |
1662 |
|
const register double A_10 = A_p[INDEX2(1,0,2)]; |
1663 |
|
const register double A_11 = A_p[INDEX2(1,1,2)]; |
1664 |
|
const register double tmp0_0 = A_01 + A_10; |
1665 |
|
const register double tmp0_1 = A_00*w18; |
1666 |
|
const register double tmp10_1 = A_01*w20; |
1667 |
|
const register double tmp12_1 = A_00*w26; |
1668 |
|
const register double tmp4_1 = A_00*w22; |
1669 |
|
const register double tmp8_1 = A_00*w24; |
1670 |
|
const register double tmp13_1 = A_10*w19; |
1671 |
|
const register double tmp9_1 = tmp0_0*w20; |
1672 |
|
const register double tmp3_1 = A_11*w21; |
1673 |
|
const register double tmp11_1 = A_11*w27; |
1674 |
|
const register double tmp1_1 = A_01*w19; |
1675 |
|
const register double tmp6_1 = A_11*w23; |
1676 |
|
const register double tmp7_1 = A_11*w25; |
1677 |
|
const register double tmp2_1 = A_10*w20; |
1678 |
|
const register double tmp5_1 = tmp0_0*w19; |
1679 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
1680 |
|
EM_S[INDEX2(1,2,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
1681 |
|
EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
1682 |
|
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
1683 |
|
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
1684 |
|
EM_S[INDEX2(3,0,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
1685 |
|
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
1686 |
|
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
1687 |
|
EM_S[INDEX2(0,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
1688 |
|
EM_S[INDEX2(2,0,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
1689 |
|
EM_S[INDEX2(1,3,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
1690 |
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
1691 |
|
EM_S[INDEX2(2,2,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
1692 |
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
1693 |
|
EM_S[INDEX2(0,3,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
1694 |
|
EM_S[INDEX2(1,1,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
1695 |
|
} |
1696 |
|
} |
1697 |
|
/////////////// |
1698 |
|
// process B // |
1699 |
|
/////////////// |
1700 |
|
if (!B.isEmpty()) { |
1701 |
|
add_EM_S=true; |
1702 |
|
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); |
1703 |
|
if (B.actsExpanded()) { |
1704 |
|
const register double B_0_0 = B_p[INDEX2(0,0,2)]; |
1705 |
|
const register double B_1_0 = B_p[INDEX2(1,0,2)]; |
1706 |
|
const register double B_0_1 = B_p[INDEX2(0,1,2)]; |
1707 |
|
const register double B_1_1 = B_p[INDEX2(1,1,2)]; |
1708 |
|
const register double B_0_2 = B_p[INDEX2(0,2,2)]; |
1709 |
|
const register double B_1_2 = B_p[INDEX2(1,2,2)]; |
1710 |
|
const register double B_0_3 = B_p[INDEX2(0,3,2)]; |
1711 |
|
const register double B_1_3 = B_p[INDEX2(1,3,2)]; |
1712 |
|
const register double tmp3_0 = B_0_0 + B_0_2; |
1713 |
|
const register double tmp1_0 = B_1_2 + B_1_3; |
1714 |
|
const register double tmp2_0 = B_0_1 + B_0_3; |
1715 |
|
const register double tmp0_0 = B_1_0 + B_1_1; |
1716 |
|
const register double tmp63_1 = B_1_1*w42; |
1717 |
|
const register double tmp79_1 = B_1_1*w40; |
1718 |
|
const register double tmp37_1 = tmp3_0*w35; |
1719 |
|
const register double tmp8_1 = tmp0_0*w32; |
1720 |
|
const register double tmp71_1 = B_0_1*w34; |
1721 |
|
const register double tmp19_1 = B_0_3*w31; |
1722 |
|
const register double tmp15_1 = B_0_3*w34; |
1723 |
|
const register double tmp9_1 = tmp3_0*w34; |
1724 |
|
const register double tmp35_1 = B_1_0*w36; |
1725 |
|
const register double tmp66_1 = B_0_3*w28; |
1726 |
|
const register double tmp28_1 = B_1_0*w42; |
1727 |
|
const register double tmp22_1 = B_1_0*w40; |
1728 |
|
const register double tmp16_1 = B_1_2*w29; |
1729 |
|
const register double tmp6_1 = tmp2_0*w35; |
1730 |
|
const register double tmp55_1 = B_1_3*w40; |
1731 |
|
const register double tmp50_1 = B_1_3*w42; |
1732 |
|
const register double tmp7_1 = tmp1_0*w29; |
1733 |
|
const register double tmp1_1 = tmp1_0*w32; |
1734 |
|
const register double tmp57_1 = B_0_3*w30; |
1735 |
|
const register double tmp18_1 = B_1_1*w32; |
1736 |
|
const register double tmp53_1 = B_1_0*w41; |
1737 |
|
const register double tmp61_1 = B_1_3*w36; |
1738 |
|
const register double tmp27_1 = B_0_3*w38; |
1739 |
|
const register double tmp64_1 = B_0_2*w30; |
1740 |
|
const register double tmp76_1 = B_0_1*w38; |
1741 |
|
const register double tmp39_1 = tmp2_0*w34; |
1742 |
|
const register double tmp62_1 = B_0_1*w31; |
1743 |
|
const register double tmp56_1 = B_0_0*w31; |
1744 |
|
const register double tmp49_1 = B_1_1*w36; |
1745 |
|
const register double tmp2_1 = B_0_2*w31; |
1746 |
|
const register double tmp23_1 = B_0_2*w33; |
1747 |
|
const register double tmp38_1 = B_1_1*w43; |
1748 |
|
const register double tmp74_1 = B_1_2*w41; |
1749 |
|
const register double tmp43_1 = B_1_1*w41; |
1750 |
|
const register double tmp58_1 = B_0_2*w28; |
1751 |
|
const register double tmp67_1 = B_0_0*w33; |
1752 |
|
const register double tmp33_1 = tmp0_0*w39; |
1753 |
|
const register double tmp4_1 = B_0_0*w28; |
1754 |
|
const register double tmp20_1 = B_0_0*w30; |
1755 |
|
const register double tmp13_1 = B_0_2*w38; |
1756 |
|
const register double tmp65_1 = B_1_2*w43; |
1757 |
|
const register double tmp0_1 = tmp0_0*w29; |
1758 |
|
const register double tmp41_1 = tmp3_0*w33; |
1759 |
|
const register double tmp73_1 = B_0_2*w37; |
1760 |
|
const register double tmp69_1 = B_0_0*w38; |
1761 |
|
const register double tmp48_1 = B_1_2*w39; |
1762 |
|
const register double tmp59_1 = B_0_1*w33; |
1763 |
|
const register double tmp17_1 = B_1_3*w41; |
1764 |
|
const register double tmp5_1 = B_0_3*w33; |
1765 |
|
const register double tmp3_1 = B_0_1*w30; |
1766 |
|
const register double tmp21_1 = B_0_1*w28; |
1767 |
|
const register double tmp42_1 = B_1_0*w29; |
1768 |
|
const register double tmp54_1 = B_1_2*w32; |
1769 |
|
const register double tmp60_1 = B_1_0*w39; |
1770 |
|
const register double tmp32_1 = tmp1_0*w36; |
1771 |
|
const register double tmp10_1 = B_0_1*w37; |
1772 |
|
const register double tmp14_1 = B_0_0*w35; |
1773 |
|
const register double tmp29_1 = B_0_1*w35; |
1774 |
|
const register double tmp26_1 = B_1_2*w36; |
1775 |
|
const register double tmp30_1 = B_1_3*w43; |
1776 |
|
const register double tmp70_1 = B_0_2*w35; |
1777 |
|
const register double tmp34_1 = B_1_3*w39; |
1778 |
|
const register double tmp51_1 = B_1_0*w43; |
1779 |
|
const register double tmp31_1 = B_0_2*w34; |
1780 |
|
const register double tmp45_1 = tmp3_0*w28; |
1781 |
|
const register double tmp11_1 = tmp1_0*w39; |
1782 |
|
const register double tmp52_1 = B_1_1*w29; |
1783 |
|
const register double tmp44_1 = B_1_3*w32; |
1784 |
|
const register double tmp25_1 = B_1_1*w39; |
1785 |
|
const register double tmp47_1 = tmp2_0*w33; |
1786 |
|
const register double tmp72_1 = B_1_3*w29; |
1787 |
|
const register double tmp40_1 = tmp2_0*w28; |
1788 |
|
const register double tmp46_1 = B_1_2*w40; |
1789 |
|
const register double tmp36_1 = B_1_2*w42; |
1790 |
|
const register double tmp24_1 = B_0_0*w37; |
1791 |
|
const register double tmp77_1 = B_0_3*w35; |
1792 |
|
const register double tmp68_1 = B_0_3*w37; |
1793 |
|
const register double tmp78_1 = B_0_0*w34; |
1794 |
|
const register double tmp12_1 = tmp0_0*w36; |
1795 |
|
const register double tmp75_1 = B_1_0*w32; |
1796 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
1797 |
|
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
1798 |
|
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
1799 |
|
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
1800 |
|
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1801 |
|
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; |
1802 |
|
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
1803 |
|
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
1804 |
|
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
1805 |
|
EM_S[INDEX2(2,0,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
1806 |
|
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
1807 |
|
EM_S[INDEX2(2,3,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
1808 |
|
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
1809 |
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
1810 |
|
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; |
1811 |
|
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
1812 |
|
} else { /* constant data */ |
1813 |
|
const register double B_0 = B_p[0]; |
1814 |
|
const register double B_1 = B_p[1]; |
1815 |
|
const register double tmp6_1 = B_1*w50; |
1816 |
|
const register double tmp1_1 = B_1*w45; |
1817 |
|
const register double tmp5_1 = B_1*w49; |
1818 |
|
const register double tmp4_1 = B_1*w48; |
1819 |
|
const register double tmp0_1 = B_0*w44; |
1820 |
|
const register double tmp2_1 = B_0*w46; |
1821 |
|
const register double tmp7_1 = B_0*w51; |
1822 |
|
const register double tmp3_1 = B_0*w47; |
1823 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
1824 |
|
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; |
1825 |
|
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; |
1826 |
|
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp5_1; |
1827 |
|
EM_S[INDEX2(3,3,4)]+=tmp3_1 + tmp6_1; |
1828 |
|
EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp4_1; |
1829 |
|
EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp6_1; |
1830 |
|
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp7_1; |
1831 |
|
EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp7_1; |
1832 |
|
EM_S[INDEX2(2,0,4)]+=tmp6_1 + tmp7_1; |
1833 |
|
EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp5_1; |
1834 |
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp4_1; |
1835 |
|
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1; |
1836 |
|
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; |
1837 |
|
EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp7_1; |
1838 |
|
EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp5_1; |
1839 |
|
} |
1840 |
|
} |
1841 |
|
/////////////// |
1842 |
|
// process C // |
1843 |
|
/////////////// |
1844 |
|
if (!C.isEmpty()) { |
1845 |
|
add_EM_S=true; |
1846 |
|
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); |
1847 |
|
if (C.actsExpanded()) { |
1848 |
|
const register double C_0_0 = C_p[INDEX2(0,0,2)]; |
1849 |
|
const register double C_1_0 = C_p[INDEX2(1,0,2)]; |
1850 |
|
const register double C_0_1 = C_p[INDEX2(0,1,2)]; |
1851 |
|
const register double C_1_1 = C_p[INDEX2(1,1,2)]; |
1852 |
|
const register double C_0_2 = C_p[INDEX2(0,2,2)]; |
1853 |
|
const register double C_1_2 = C_p[INDEX2(1,2,2)]; |
1854 |
|
const register double C_0_3 = C_p[INDEX2(0,3,2)]; |
1855 |
|
const register double C_1_3 = C_p[INDEX2(1,3,2)]; |
1856 |
|
const register double tmp2_0 = C_0_1 + C_0_3; |
1857 |
|
const register double tmp1_0 = C_1_2 + C_1_3; |
1858 |
|
const register double tmp3_0 = C_0_0 + C_0_2; |
1859 |
|
const register double tmp0_0 = C_1_0 + C_1_1; |
1860 |
|
const register double tmp64_1 = C_0_2*w30; |
1861 |
|
const register double tmp14_1 = C_0_2*w28; |
1862 |
|
const register double tmp19_1 = C_0_3*w31; |
1863 |
|
const register double tmp22_1 = C_1_0*w40; |
1864 |
|
const register double tmp37_1 = tmp3_0*w35; |
1865 |
|
const register double tmp29_1 = C_0_1*w35; |
1866 |
|
const register double tmp73_1 = C_0_2*w37; |
1867 |
|
const register double tmp74_1 = C_1_2*w41; |
1868 |
|
const register double tmp52_1 = C_1_3*w39; |
1869 |
|
const register double tmp25_1 = C_1_1*w39; |
1870 |
|
const register double tmp62_1 = C_0_1*w31; |
1871 |
|
const register double tmp79_1 = C_1_1*w40; |
1872 |
|
const register double tmp43_1 = C_1_1*w36; |
1873 |
|
const register double tmp27_1 = C_0_3*w38; |
1874 |
|
const register double tmp28_1 = C_1_0*w42; |
1875 |
|
const register double tmp63_1 = C_1_1*w42; |
1876 |
|
const register double tmp59_1 = C_0_3*w34; |
1877 |
|
const register double tmp72_1 = C_1_3*w29; |
1878 |
|
const register double tmp40_1 = tmp2_0*w35; |
1879 |
|
const register double tmp13_1 = C_0_3*w30; |
1880 |
|
const register double tmp51_1 = C_1_2*w40; |
1881 |
|
const register double tmp54_1 = C_1_2*w42; |
1882 |
|
const register double tmp12_1 = C_0_0*w31; |
1883 |
|
const register double tmp2_1 = tmp1_0*w32; |
1884 |
|
const register double tmp68_1 = C_0_2*w31; |
1885 |
|
const register double tmp75_1 = C_1_0*w32; |
1886 |
|
const register double tmp49_1 = C_1_1*w41; |
1887 |
|
const register double tmp4_1 = C_0_2*w35; |
1888 |
|
const register double tmp66_1 = C_0_3*w28; |
1889 |
|
const register double tmp56_1 = C_0_1*w37; |
1890 |
|
const register double tmp5_1 = C_0_1*w34; |
1891 |
|
const register double tmp38_1 = tmp2_0*w34; |
1892 |
|
const register double tmp76_1 = C_0_1*w38; |
1893 |
|
const register double tmp21_1 = C_0_1*w28; |
1894 |
|
const register double tmp69_1 = C_0_1*w30; |
1895 |
|
const register double tmp53_1 = C_1_0*w36; |
1896 |
|
const register double tmp42_1 = C_1_2*w39; |
1897 |
|
const register double tmp32_1 = tmp1_0*w29; |
1898 |
|
const register double tmp45_1 = C_1_0*w43; |
1899 |
|
const register double tmp33_1 = tmp0_0*w32; |
1900 |
|
const register double tmp35_1 = C_1_0*w41; |
1901 |
|
const register double tmp26_1 = C_1_2*w36; |
1902 |
|
const register double tmp67_1 = C_0_0*w33; |
1903 |
|
const register double tmp31_1 = C_0_2*w34; |
1904 |
|
const register double tmp20_1 = C_0_0*w30; |
1905 |
|
const register double tmp70_1 = C_0_0*w28; |
1906 |
|
const register double tmp8_1 = tmp0_0*w39; |
1907 |
|
const register double tmp30_1 = C_1_3*w43; |
1908 |
|
const register double tmp0_1 = tmp0_0*w29; |
1909 |
|
const register double tmp17_1 = C_1_3*w41; |
1910 |
|
const register double tmp58_1 = C_0_0*w35; |
1911 |
|
const register double tmp9_1 = tmp3_0*w33; |
1912 |
|
const register double tmp61_1 = C_1_3*w36; |
1913 |
|
const register double tmp41_1 = tmp3_0*w34; |
1914 |
|
const register double tmp50_1 = C_1_3*w32; |
1915 |
|
const register double tmp18_1 = C_1_1*w32; |
1916 |
|
const register double tmp6_1 = tmp1_0*w36; |
1917 |
|
const register double tmp3_1 = C_0_0*w38; |
1918 |
|
const register double tmp34_1 = C_1_1*w29; |
1919 |
|
const register double tmp77_1 = C_0_3*w35; |
1920 |
|
const register double tmp65_1 = C_1_2*w43; |
1921 |
|
const register double tmp71_1 = C_0_3*w33; |
1922 |
|
const register double tmp55_1 = C_1_1*w43; |
1923 |
|
const register double tmp46_1 = tmp3_0*w28; |
1924 |
|
const register double tmp24_1 = C_0_0*w37; |
1925 |
|
const register double tmp10_1 = tmp1_0*w39; |
1926 |
|
const register double tmp48_1 = C_1_0*w29; |
1927 |
|
const register double tmp15_1 = C_0_1*w33; |
1928 |
|
const register double tmp36_1 = C_1_2*w32; |
1929 |
|
const register double tmp60_1 = C_1_0*w39; |
1930 |
|
const register double tmp47_1 = tmp2_0*w33; |
1931 |
|
const register double tmp16_1 = C_1_2*w29; |
1932 |
|
const register double tmp1_1 = C_0_3*w37; |
1933 |
|
const register double tmp7_1 = tmp2_0*w28; |
1934 |
|
const register double tmp39_1 = C_1_3*w40; |
1935 |
|
const register double tmp44_1 = C_1_3*w42; |
1936 |
|
const register double tmp57_1 = C_0_2*w38; |
1937 |
|
const register double tmp78_1 = C_0_0*w34; |
1938 |
|
const register double tmp11_1 = tmp0_0*w36; |
1939 |
|
const register double tmp23_1 = C_0_2*w33; |
1940 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
1941 |
|
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
1942 |
|
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
1943 |
|
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
1944 |
|
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1945 |
|
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; |
1946 |
|
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
1947 |
|
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
1948 |
|
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
1949 |
|
EM_S[INDEX2(2,0,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
1950 |
|
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
1951 |
|
EM_S[INDEX2(2,3,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
1952 |
|
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
1953 |
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
1954 |
|
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; |
1955 |
|
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
1956 |
|
} else { /* constant data */ |
1957 |
|
const register double C_0 = C_p[0]; |
1958 |
|
const register double C_1 = C_p[1]; |
1959 |
|
const register double tmp1_1 = C_1*w45; |
1960 |
|
const register double tmp3_1 = C_0*w51; |
1961 |
|
const register double tmp4_1 = C_0*w44; |
1962 |
|
const register double tmp7_1 = C_0*w46; |
1963 |
|
const register double tmp5_1 = C_1*w49; |
1964 |
|
const register double tmp2_1 = C_1*w48; |
1965 |
|
const register double tmp0_1 = C_0*w47; |
1966 |
|
const register double tmp6_1 = C_1*w50; |
1967 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
1968 |
|
EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; |
1969 |
|
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp4_1; |
1970 |
|
EM_S[INDEX2(0,0,4)]+=tmp4_1 + tmp5_1; |
1971 |
|
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp6_1; |
1972 |
|
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; |
1973 |
|
EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp7_1; |
1974 |
|
EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp7_1; |
1975 |
|
EM_S[INDEX2(0,2,4)]+=tmp3_1 + tmp6_1; |
1976 |
|
EM_S[INDEX2(2,0,4)]+=tmp3_1 + tmp5_1; |
1977 |
|
EM_S[INDEX2(1,3,4)]+=tmp6_1 + tmp7_1; |
1978 |
|
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; |
1979 |
|
EM_S[INDEX2(2,2,4)]+=tmp4_1 + tmp6_1; |
1980 |
|
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp4_1; |
1981 |
|
EM_S[INDEX2(0,3,4)]+=tmp2_1 + tmp7_1; |
1982 |
|
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp5_1; |
1983 |
|
} |
1984 |
|
} |
1985 |
|
/////////////// |
1986 |
|
// process D // |
1987 |
|
/////////////// |
1988 |
|
if (!D.isEmpty()) { |
1989 |
|
add_EM_S=true; |
1990 |
|
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); |
1991 |
|
if (D.actsExpanded()) { |
1992 |
|
const register double D_0 = D_p[0]; |
1993 |
|
const register double D_1 = D_p[1]; |
1994 |
|
const register double D_2 = D_p[2]; |
1995 |
|
const register double D_3 = D_p[3]; |
1996 |
|
const register double tmp4_0 = D_1 + D_3; |
1997 |
|
const register double tmp2_0 = D_0 + D_1 + D_2 + D_3; |
1998 |
|
const register double tmp5_0 = D_0 + D_2; |
1999 |
|
const register double tmp0_0 = D_0 + D_1; |
2000 |
|
const register double tmp6_0 = D_0 + D_3; |
2001 |
|
const register double tmp1_0 = D_2 + D_3; |
2002 |
|
const register double tmp3_0 = D_1 + D_2; |
2003 |
|
const register double tmp16_1 = D_1*w56; |
2004 |
|
const register double tmp14_1 = tmp6_0*w54; |
2005 |
|
const register double tmp8_1 = D_3*w55; |
2006 |
|
const register double tmp2_1 = tmp2_0*w54; |
2007 |
|
const register double tmp12_1 = tmp5_0*w52; |
2008 |
|
const register double tmp4_1 = tmp0_0*w53; |
2009 |
|
const register double tmp3_1 = tmp1_0*w52; |
2010 |
|
const register double tmp13_1 = tmp4_0*w53; |
2011 |
|
const register double tmp10_1 = tmp4_0*w52; |
2012 |
|
const register double tmp1_1 = tmp1_0*w53; |
2013 |
|
const register double tmp7_1 = D_3*w56; |
2014 |
|
const register double tmp0_1 = tmp0_0*w52; |
2015 |
|
const register double tmp11_1 = tmp5_0*w53; |
2016 |
|
const register double tmp9_1 = D_0*w56; |
2017 |
|
const register double tmp5_1 = tmp3_0*w54; |
2018 |
|
const register double tmp18_1 = D_2*w56; |
2019 |
|
const register double tmp17_1 = D_1*w55; |
2020 |
|
const register double tmp6_1 = D_0*w55; |
2021 |
|
const register double tmp15_1 = D_2*w55; |
2022 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
2023 |
|
EM_S[INDEX2(1,2,4)]+=tmp2_1; |
2024 |
|
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; |
2025 |
|
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp6_1 + tmp7_1; |
2026 |
|
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp8_1 + tmp9_1; |
2027 |
|
EM_S[INDEX2(3,0,4)]+=tmp2_1; |
2028 |
|
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1; |
2029 |
|
EM_S[INDEX2(2,1,4)]+=tmp2_1; |
2030 |
|
EM_S[INDEX2(0,2,4)]+=tmp12_1 + tmp13_1; |
2031 |
|
EM_S[INDEX2(2,0,4)]+=tmp12_1 + tmp13_1; |
2032 |
|
EM_S[INDEX2(1,3,4)]+=tmp10_1 + tmp11_1; |
2033 |
|
EM_S[INDEX2(2,3,4)]+=tmp3_1 + tmp4_1; |
2034 |
|
EM_S[INDEX2(2,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1; |
2035 |
|
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1; |
2036 |
|
EM_S[INDEX2(0,3,4)]+=tmp2_1; |
2037 |
|
EM_S[INDEX2(1,1,4)]+=tmp14_1 + tmp17_1 + tmp18_1; |
2038 |
|
} else { /* constant data */ |
2039 |
|
const register double D_0 = D_p[0]; |
2040 |
|
const register double tmp2_1 = D_0*w59; |
2041 |
|
const register double tmp1_1 = D_0*w58; |
2042 |
|
const register double tmp0_1 = D_0*w57; |
2043 |
|
EM_S[INDEX2(0,1,4)]+=tmp0_1; |
2044 |
|
EM_S[INDEX2(1,2,4)]+=tmp1_1; |
2045 |
|
EM_S[INDEX2(3,2,4)]+=tmp0_1; |
2046 |
|
EM_S[INDEX2(0,0,4)]+=tmp2_1; |
2047 |
|
EM_S[INDEX2(3,3,4)]+=tmp2_1; |
2048 |
|
EM_S[INDEX2(3,0,4)]+=tmp1_1; |
2049 |
|
EM_S[INDEX2(3,1,4)]+=tmp0_1; |
2050 |
|
EM_S[INDEX2(2,1,4)]+=tmp1_1; |
2051 |
|
EM_S[INDEX2(0,2,4)]+=tmp0_1; |
2052 |
|
EM_S[INDEX2(2,0,4)]+=tmp0_1; |
2053 |
|
EM_S[INDEX2(1,3,4)]+=tmp0_1; |
2054 |
|
EM_S[INDEX2(2,3,4)]+=tmp0_1; |
2055 |
|
EM_S[INDEX2(2,2,4)]+=tmp2_1; |
2056 |
|
EM_S[INDEX2(1,0,4)]+=tmp0_1; |
2057 |
|
EM_S[INDEX2(0,3,4)]+=tmp1_1; |
2058 |
|
EM_S[INDEX2(1,1,4)]+=tmp2_1; |
2059 |
|
} |
2060 |
|
} |
2061 |
|
/////////////// |
2062 |
|
// process X // |
2063 |
|
/////////////// |
2064 |
|
if (!X.isEmpty()) { |
2065 |
|
add_EM_F=true; |
2066 |
|
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); |
2067 |
|
if (X.actsExpanded()) { |
2068 |
|
const register double X_0_0 = X_p[INDEX2(0,0,2)]; |
2069 |
|
const register double X_1_0 = X_p[INDEX2(1,0,2)]; |
2070 |
|
const register double X_0_1 = X_p[INDEX2(0,1,2)]; |
2071 |
|
const register double X_1_1 = X_p[INDEX2(1,1,2)]; |
2072 |
|
const register double X_0_2 = X_p[INDEX2(0,2,2)]; |
2073 |
|
const register double X_1_2 = X_p[INDEX2(1,2,2)]; |
2074 |
|
const register double X_0_3 = X_p[INDEX2(0,3,2)]; |
2075 |
|
const register double X_1_3 = X_p[INDEX2(1,3,2)]; |
2076 |
|
const register double tmp1_0 = X_1_1 + X_1_3; |
2077 |
|
const register double tmp3_0 = X_0_0 + X_0_1; |
2078 |
|
const register double tmp2_0 = X_1_0 + X_1_2; |
2079 |
|
const register double tmp0_0 = X_0_2 + X_0_3; |
2080 |
|
const register double tmp8_1 = tmp2_0*w66; |
2081 |
|
const register double tmp5_1 = tmp3_0*w64; |
2082 |
|
const register double tmp14_1 = tmp0_0*w64; |
2083 |
|
const register double tmp3_1 = tmp3_0*w60; |
2084 |
|
const register double tmp9_1 = tmp3_0*w63; |
2085 |
|
const register double tmp13_1 = tmp3_0*w65; |
2086 |
|
const register double tmp12_1 = tmp1_0*w66; |
2087 |
|
const register double tmp10_1 = tmp0_0*w60; |
2088 |
|
const register double tmp2_1 = tmp2_0*w61; |
2089 |
|
const register double tmp6_1 = tmp2_0*w62; |
2090 |
|
const register double tmp4_1 = tmp0_0*w65; |
2091 |
|
const register double tmp11_1 = tmp1_0*w67; |
2092 |
|
const register double tmp1_1 = tmp1_0*w62; |
2093 |
|
const register double tmp7_1 = tmp1_0*w61; |
2094 |
|
const register double tmp0_1 = tmp0_0*w63; |
2095 |
|
const register double tmp15_1 = tmp2_0*w67; |
2096 |
|
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
2097 |
|
EM_F[1]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; |
2098 |
|
EM_F[2]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; |
2099 |
|
EM_F[3]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
2100 |
|
} else { /* constant data */ |
2101 |
|
const register double X_0 = X_p[0]; |
2102 |
|
const register double X_1 = X_p[1]; |
2103 |
|
const register double tmp3_1 = X_1*w71; |
2104 |
|
const register double tmp2_1 = X_0*w70; |
2105 |
|
const register double tmp1_1 = X_0*w68; |
2106 |
|
const register double tmp0_1 = X_1*w69; |
2107 |
|
EM_F[0]+=tmp0_1 + tmp1_1; |
2108 |
|
EM_F[1]+=tmp0_1 + tmp2_1; |
2109 |
|
EM_F[2]+=tmp1_1 + tmp3_1; |
2110 |
|
EM_F[3]+=tmp2_1 + tmp3_1; |
2111 |
|
} |
2112 |
|
} |
2113 |
|
/////////////// |
2114 |
|
// process Y // |
2115 |
|
/////////////// |
2116 |
|
if (!Y.isEmpty()) { |
2117 |
|
add_EM_F=true; |
2118 |
|
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); |
2119 |
|
if (Y.actsExpanded()) { |
2120 |
|
const register double Y_0 = Y_p[0]; |
2121 |
|
const register double Y_1 = Y_p[1]; |
2122 |
|
const register double Y_2 = Y_p[2]; |
2123 |
|
const register double Y_3 = Y_p[3]; |
2124 |
|
const register double tmp0_0 = Y_1 + Y_2; |
2125 |
|
const register double tmp1_0 = Y_0 + Y_3; |
2126 |
|
const register double tmp9_1 = Y_0*w74; |
2127 |
|
const register double tmp4_1 = tmp1_0*w73; |
2128 |
|
const register double tmp5_1 = Y_2*w74; |
2129 |
|
const register double tmp7_1 = Y_1*w74; |
2130 |
|
const register double tmp6_1 = Y_2*w72; |
2131 |
|
const register double tmp2_1 = Y_3*w74; |
2132 |
|
const register double tmp8_1 = Y_3*w72; |
2133 |
|
const register double tmp3_1 = Y_1*w72; |
2134 |
|
const register double tmp0_1 = Y_0*w72; |
2135 |
|
const register double tmp1_1 = tmp0_0*w73; |
2136 |
|
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1; |
2137 |
|
EM_F[1]+=tmp3_1 + tmp4_1 + tmp5_1; |
2138 |
|
EM_F[2]+=tmp4_1 + tmp6_1 + tmp7_1; |
2139 |
|
EM_F[3]+=tmp1_1 + tmp8_1 + tmp9_1; |
2140 |
|
} else { /* constant data */ |
2141 |
|
const register double Y_0 = Y_p[0]; |
2142 |
|
const register double tmp0_1 = Y_0*w75; |
2143 |
|
EM_F[0]+=tmp0_1; |
2144 |
|
EM_F[1]+=tmp0_1; |
2145 |
|
EM_F[2]+=tmp0_1; |
2146 |
|
EM_F[3]+=tmp0_1; |
2147 |
|
} |
2148 |
|
} |
2149 |
|
/* GENERATOR SNIP_PDE_SINGLE BOTTOM */ |
2150 |
|
|
2151 |
|
// add to matrix (if add_EM_S) and RHS (if add_EM_F) |
2152 |
|
const index_t firstNode=m_N0*k1+k0; |
2153 |
|
IndexVector rowIndex; |
2154 |
|
rowIndex.push_back(m_dofMap[firstNode]); |
2155 |
|
rowIndex.push_back(m_dofMap[firstNode+1]); |
2156 |
|
rowIndex.push_back(m_dofMap[firstNode+m_N0]); |
2157 |
|
rowIndex.push_back(m_dofMap[firstNode+m_N0+1]); |
2158 |
|
if (add_EM_F) { |
2159 |
|
//cout << "-- AddtoRHS -- " << endl; |
2160 |
|
double *F_p=rhs.getSampleDataRW(0); |
2161 |
|
for (index_t i=0; i<rowIndex.size(); i++) { |
2162 |
|
if (rowIndex[i]<getNumDOF()) { |
2163 |
|
F_p[rowIndex[i]]+=EM_F[i]; |
2164 |
|
//cout << "F[" << rowIndex[i] << "]=" << F_p[rowIndex[i]] << endl; |
2165 |
|
} |
2166 |
|
} |
2167 |
|
//cout << "---"<<endl; |
2168 |
|
} |
2169 |
|
if (add_EM_S) { |
2170 |
|
//cout << "-- AddtoSystem -- " << endl; |
2171 |
|
addToSystemMatrix(mat, rowIndex, 1, rowIndex, 1, EM_S); |
2172 |
|
} |
2173 |
|
|
2174 |
|
} // end k0 loop |
2175 |
|
} // end k1 loop |
2176 |
|
} // end of coloring |
2177 |
|
} // end of parallel region |
2178 |
|
} |
2179 |
|
|
2180 |
|
//protected |
2181 |
|
void Rectangle::assemblePDESystem(Paso_SystemMatrix* mat, |
2182 |
|
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
2183 |
|
const escript::Data& C, const escript::Data& D, |
2184 |
|
const escript::Data& X, const escript::Data& Y, |
2185 |
|
const escript::Data& d, const escript::Data& y) const |
2186 |
|
{ |
2187 |
|
const double h0 = m_l0/m_gNE0; |
2188 |
|
const double h1 = m_l1/m_gNE1; |
2189 |
|
dim_t numEq, numComp; |
2190 |
|
if (!mat) |
2191 |
|
numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize()); |
2192 |
|
else { |
2193 |
|
numEq=mat->logical_row_block_size; |
2194 |
|
numComp=mat->logical_col_block_size; |
2195 |
|
} |
2196 |
|
|
2197 |
|
/* GENERATOR SNIP_PDE_SYSTEM_PRE TOP */ |
2198 |
|
const double w0 = -0.1555021169820365539*h1/h0; |
2199 |
|
const double w1 = 0.041666666666666666667; |
2200 |
|
const double w10 = -0.041666666666666666667*h0/h1; |
2201 |
|
const double w11 = 0.1555021169820365539*h1/h0; |
2202 |
|
const double w12 = 0.1555021169820365539*h0/h1; |
2203 |
|
const double w13 = 0.01116454968463011277*h0/h1; |
2204 |
|
const double w14 = 0.01116454968463011277*h1/h0; |
2205 |
|
const double w15 = 0.041666666666666666667*h1/h0; |
2206 |
|
const double w16 = -0.01116454968463011277*h0/h1; |
2207 |
|
const double w17 = -0.1555021169820365539*h0/h1; |
2208 |
|
const double w18 = -0.33333333333333333333*h1/h0; |
2209 |
|
const double w19 = 0.25000000000000000000; |
2210 |
|
const double w2 = -0.15550211698203655390; |
2211 |
|
const double w20 = -0.25000000000000000000; |
2212 |
|
const double w21 = 0.16666666666666666667*h0/h1; |
2213 |
|
const double w22 = -0.16666666666666666667*h1/h0; |
2214 |
|
const double w23 = -0.16666666666666666667*h0/h1; |
2215 |
|
const double w24 = 0.33333333333333333333*h1/h0; |
2216 |
|
const double w25 = 0.33333333333333333333*h0/h1; |
2217 |
|
const double w26 = 0.16666666666666666667*h1/h0; |
2218 |
|
const double w27 = -0.33333333333333333333*h0/h1; |
2219 |
|
const double w28 = -0.032861463941450536761*h1; |
2220 |
|
const double w29 = -0.032861463941450536761*h0; |
2221 |
|
const double w3 = 0.041666666666666666667*h0/h1; |
2222 |
|
const double w30 = -0.12264065304058601714*h1; |
2223 |
|
const double w31 = -0.0023593469594139828636*h1; |
2224 |
|
const double w32 = -0.008805202725216129906*h0; |
2225 |
|
const double w33 = -0.008805202725216129906*h1; |
2226 |
|
const double w34 = 0.032861463941450536761*h1; |
2227 |
|
const double w35 = 0.008805202725216129906*h1; |
2228 |
|
const double w36 = 0.008805202725216129906*h0; |
2229 |
|
const double w37 = 0.0023593469594139828636*h1; |
2230 |
|
const double w38 = 0.12264065304058601714*h1; |
2231 |
|
const double w39 = 0.032861463941450536761*h0; |
2232 |
|
const double w4 = 0.15550211698203655390; |
2233 |
|
const double w40 = -0.12264065304058601714*h0; |
2234 |
|
const double w41 = -0.0023593469594139828636*h0; |
2235 |
|
const double w42 = 0.0023593469594139828636*h0; |
2236 |
|
const double w43 = 0.12264065304058601714*h0; |
2237 |
|
const double w44 = -0.16666666666666666667*h1; |
2238 |
|
const double w45 = -0.083333333333333333333*h0; |
2239 |
|
const double w46 = 0.083333333333333333333*h1; |
2240 |
|
const double w47 = 0.16666666666666666667*h1; |
2241 |
|
const double w48 = 0.083333333333333333333*h0; |
2242 |
|
const double w49 = -0.16666666666666666667*h0; |
2243 |
|
const double w5 = -0.041666666666666666667; |
2244 |
|
const double w50 = 0.16666666666666666667*h0; |
2245 |
|
const double w51 = -0.083333333333333333333*h1; |
2246 |
|
const double w52 = 0.025917019497006092316*h0*h1; |
2247 |
|
const double w53 = 0.0018607582807716854616*h0*h1; |
2248 |
|
const double w54 = 0.0069444444444444444444*h0*h1; |
2249 |
|
const double w55 = 0.09672363354357992482*h0*h1; |
2250 |
|
const double w56 = 0.00049858867864229740201*h0*h1; |
2251 |
|
const double w57 = 0.055555555555555555556*h0*h1; |
2252 |
|
const double w58 = 0.027777777777777777778*h0*h1; |
2253 |
|
const double w59 = 0.11111111111111111111*h0*h1; |
2254 |
|
const double w6 = -0.01116454968463011277*h1/h0; |
2255 |
|
const double w60 = -0.19716878364870322056*h1; |
2256 |
|
const double w61 = -0.19716878364870322056*h0; |
2257 |
|
const double w62 = -0.052831216351296779436*h0; |
2258 |
|
const double w63 = -0.052831216351296779436*h1; |
2259 |
|
const double w64 = 0.19716878364870322056*h1; |
2260 |
|
const double w65 = 0.052831216351296779436*h1; |
2261 |
|
const double w66 = 0.19716878364870322056*h0; |
2262 |
|
const double w67 = 0.052831216351296779436*h0; |
2263 |
|
const double w68 = -0.5*h1; |
2264 |
|
const double w69 = -0.5*h0; |
2265 |
|
const double w7 = 0.011164549684630112770; |
2266 |
|
const double w70 = 0.5*h1; |
2267 |
|
const double w71 = 0.5*h0; |
2268 |
|
const double w72 = 0.1555021169820365539*h0*h1; |
2269 |
|
const double w73 = 0.041666666666666666667*h0*h1; |
2270 |
|
const double w74 = 0.01116454968463011277*h0*h1; |
2271 |
|
const double w75 = 0.25*h0*h1; |
2272 |
|
const double w8 = -0.011164549684630112770; |
2273 |
|
const double w9 = -0.041666666666666666667*h1/h0; |
2274 |
|
/* GENERATOR SNIP_PDE_SYSTEM_PRE BOTTOM */ |
2275 |
|
|
2276 |
|
rhs.requireWrite(); |
2277 |
|
#pragma omp parallel |
2278 |
|
{ |
2279 |
|
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
2280 |
|
#pragma omp for |
2281 |
|
for (index_t k1=k1_0; k1<m_NE1; k1+=2) { |
2282 |
|
for (index_t k0=0; k0<m_NE0; ++k0) { |
2283 |
|
bool add_EM_S=false; |
2284 |
|
bool add_EM_F=false; |
2285 |
|
vector<double> EM_S(4*4*numEq*numComp, 0); |
2286 |
|
vector<double> EM_F(4*numEq, 0); |
2287 |
|
const index_t e = k0 + m_NE0*k1; |
2288 |
|
/* GENERATOR SNIP_PDE_SYSTEM TOP */ |
2289 |
|
/////////////// |
2290 |
|
// process A // |
2291 |
|
/////////////// |
2292 |
|
if (!A.isEmpty()) { |
2293 |
|
add_EM_S=true; |
2294 |
|
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); |
2295 |
|
if (A.actsExpanded()) { |
2296 |
|
for (index_t k=0; k<numEq; k++) { |
2297 |
|
for (index_t m=0; m<numComp; m++) { |
2298 |
|
const register double A_00_0 = A_p[INDEX5(k,0,m,0,0, numEq,2,numComp,2)]; |
2299 |
|
const register double A_01_0 = A_p[INDEX5(k,0,m,1,0, numEq,2,numComp,2)]; |
2300 |
|
const register double A_10_0 = A_p[INDEX5(k,1,m,0,0, numEq,2,numComp,2)]; |
2301 |
|
const register double A_11_0 = A_p[INDEX5(k,1,m,1,0, numEq,2,numComp,2)]; |
2302 |
|
const register double A_00_1 = A_p[INDEX5(k,0,m,0,1, numEq,2,numComp,2)]; |
2303 |
|
const register double A_01_1 = A_p[INDEX5(k,0,m,1,1, numEq,2,numComp,2)]; |
2304 |
|
const register double A_10_1 = A_p[INDEX5(k,1,m,0,1, numEq,2,numComp,2)]; |
2305 |
|
const register double A_11_1 = A_p[INDEX5(k,1,m,1,1, numEq,2,numComp,2)]; |
2306 |
|
const register double A_00_2 = A_p[INDEX5(k,0,m,0,2, numEq,2,numComp,2)]; |
2307 |
|
const register double A_01_2 = A_p[INDEX5(k,0,m,1,2, numEq,2,numComp,2)]; |
2308 |
|
const register double A_10_2 = A_p[INDEX5(k,1,m,0,2, numEq,2,numComp,2)]; |
2309 |
|
const register double A_11_2 = A_p[INDEX5(k,1,m,1,2, numEq,2,numComp,2)]; |
2310 |
|
const register double A_00_3 = A_p[INDEX5(k,0,m,0,3, numEq,2,numComp,2)]; |
2311 |
|
const register double A_01_3 = A_p[INDEX5(k,0,m,1,3, numEq,2,numComp,2)]; |
2312 |
|
const register double A_10_3 = A_p[INDEX5(k,1,m,0,3, numEq,2,numComp,2)]; |
2313 |
|
const register double A_11_3 = A_p[INDEX5(k,1,m,1,3, numEq,2,numComp,2)]; |
2314 |
|
const register double tmp4_0 = A_10_1 + A_10_2; |
2315 |
|
const register double tmp12_0 = A_11_0 + A_11_2; |
2316 |
|
const register double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; |
2317 |
|
const register double tmp10_0 = A_01_3 + A_10_3; |
2318 |
|
const register double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; |
2319 |
|
const register double tmp0_0 = A_01_0 + A_01_3; |
2320 |
|
const register double tmp13_0 = A_01_2 + A_10_1; |
2321 |
|
const register double tmp3_0 = A_00_2 + A_00_3; |
2322 |
|
const register double tmp11_0 = A_11_1 + A_11_3; |
2323 |
|
const register double tmp18_0 = A_01_1 + A_10_1; |
2324 |
|
const register double tmp1_0 = A_00_0 + A_00_1; |
2325 |
|
const register double tmp15_0 = A_01_1 + A_10_2; |
2326 |
|
const register double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; |
2327 |
|
const register double tmp16_0 = A_10_0 + A_10_3; |
2328 |
|
const register double tmp6_0 = A_01_3 + A_10_0; |
2329 |
|
const register double tmp17_0 = A_01_1 + A_01_2; |
2330 |
|
const register double tmp9_0 = A_01_0 + A_10_0; |
2331 |
|
const register double tmp7_0 = A_01_0 + A_10_3; |
2332 |
|
const register double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; |
2333 |
|
const register double tmp19_0 = A_01_2 + A_10_2; |
2334 |
|
const register double tmp14_1 = A_10_0*w8; |
2335 |
|
const register double tmp23_1 = tmp3_0*w14; |
2336 |
|
const register double tmp35_1 = A_01_0*w8; |
2337 |
|
const register double tmp54_1 = tmp13_0*w8; |
2338 |
|
const register double tmp20_1 = tmp9_0*w4; |
2339 |
|
const register double tmp25_1 = tmp12_0*w12; |
2340 |
|
const register double tmp2_1 = A_01_1*w4; |
2341 |
|
const register double tmp44_1 = tmp7_0*w7; |
2342 |
|
const register double tmp26_1 = tmp10_0*w4; |
2343 |
|
const register double tmp52_1 = tmp18_0*w8; |
2344 |
|
const register double tmp48_1 = A_10_1*w7; |
2345 |
|
const register double tmp46_1 = A_01_3*w8; |
2346 |
|
const register double tmp50_1 = A_01_0*w2; |
2347 |
|
const register double tmp8_1 = tmp4_0*w5; |
2348 |
|
const register double tmp56_1 = tmp19_0*w8; |
2349 |
|
const register double tmp9_1 = tmp2_0*w10; |
2350 |
|
const register double tmp19_1 = A_10_3*w2; |
2351 |
|
const register double tmp47_1 = A_10_2*w4; |
2352 |
|
const register double tmp16_1 = tmp3_0*w0; |
2353 |
|
const register double tmp18_1 = tmp1_0*w6; |
2354 |
|
const register double tmp31_1 = tmp11_0*w12; |
2355 |
|
const register double tmp55_1 = tmp15_0*w2; |
2356 |
|
const register double tmp39_1 = A_10_2*w7; |
2357 |
|
const register double tmp11_1 = tmp6_0*w7; |
2358 |
|
const register double tmp40_1 = tmp11_0*w17; |
2359 |
|
const register double tmp34_1 = tmp15_0*w8; |
2360 |
|
const register double tmp33_1 = tmp14_0*w5; |
2361 |
|
const register double tmp24_1 = tmp11_0*w13; |
2362 |
|
const register double tmp3_1 = tmp1_0*w0; |
2363 |
|
const register double tmp5_1 = tmp2_0*w3; |
2364 |
|
const register double tmp43_1 = tmp17_0*w5; |
2365 |
|
const register double tmp15_1 = A_01_2*w4; |
2366 |
|
const register double tmp53_1 = tmp19_0*w2; |
2367 |
|
const register double tmp27_1 = tmp3_0*w11; |
2368 |
|
const register double tmp32_1 = tmp13_0*w2; |
2369 |
|
const register double tmp10_1 = tmp5_0*w9; |
2370 |
|
const register double tmp37_1 = A_10_1*w4; |
2371 |
|
const register double tmp38_1 = tmp5_0*w15; |
2372 |
|
const register double tmp17_1 = A_01_1*w7; |
2373 |
|
const register double tmp12_1 = tmp7_0*w4; |
2374 |
|
const register double tmp22_1 = tmp10_0*w7; |
2375 |
|
const register double tmp57_1 = tmp18_0*w2; |
2376 |
|
const register double tmp28_1 = tmp9_0*w7; |
2377 |
|
const register double tmp29_1 = tmp1_0*w14; |
2378 |
|
const register double tmp51_1 = tmp11_0*w16; |
2379 |
|
const register double tmp42_1 = tmp12_0*w16; |
2380 |
|
const register double tmp49_1 = tmp12_0*w17; |
2381 |
|
const register double tmp21_1 = tmp1_0*w11; |
2382 |
|
const register double tmp1_1 = tmp0_0*w1; |
2383 |
|
const register double tmp45_1 = tmp6_0*w4; |
2384 |
|
const register double tmp7_1 = A_10_0*w2; |
2385 |
|
const register double tmp6_1 = tmp3_0*w6; |
2386 |
|
const register double tmp13_1 = tmp8_0*w1; |
2387 |
|
const register double tmp36_1 = tmp16_0*w1; |
2388 |
|
const register double tmp41_1 = A_01_3*w2; |
2389 |
|
const register double tmp30_1 = tmp12_0*w13; |
2390 |
|
const register double tmp4_1 = A_01_2*w7; |
2391 |
|
const register double tmp0_1 = A_10_3*w8; |
2392 |
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; |
2393 |
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; |
2394 |
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; |
2395 |
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; |
2396 |
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
2397 |
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; |
2398 |
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; |
2399 |
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; |
2400 |
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
2401 |
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; |
2402 |
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; |
2403 |
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; |
2404 |
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; |
2405 |
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; |
2406 |
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; |
2407 |
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; |
2408 |
|
} |
2409 |
|
} |
2410 |
|
} else { /* constant data */ |
2411 |
|
for (index_t k=0; k<numEq; k++) { |
2412 |
|
for (index_t m=0; m<numComp; m++) { |
2413 |
|
const register double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]; |
2414 |
|
const register double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]; |
2415 |
|
const register double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]; |
2416 |
|
const register double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]; |
2417 |
|
const register double tmp0_0 = A_01 + A_10; |
2418 |
|
const register double tmp0_1 = A_00*w18; |
2419 |
|
const register double tmp10_1 = A_01*w20; |
2420 |
|
const register double tmp12_1 = A_00*w26; |
2421 |
|
const register double tmp4_1 = A_00*w22; |
2422 |
|
const register double tmp8_1 = A_00*w24; |
2423 |
|
const register double tmp13_1 = A_10*w19; |
2424 |
|
const register double tmp9_1 = tmp0_0*w20; |
2425 |
|
const register double tmp3_1 = A_11*w21; |
2426 |
|
const register double tmp11_1 = A_11*w27; |
2427 |
|
const register double tmp1_1 = A_01*w19; |
2428 |
|
const register double tmp6_1 = A_11*w23; |
2429 |
|
const register double tmp7_1 = A_11*w25; |
2430 |
|
const register double tmp2_1 = A_10*w20; |
2431 |
|
const register double tmp5_1 = tmp0_0*w19; |
2432 |
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
2433 |
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
2434 |
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
2435 |
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
2436 |
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
2437 |
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
2438 |
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
2439 |
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
2440 |
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
2441 |
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
2442 |
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
2443 |
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
2444 |
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
2445 |
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
2446 |
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
2447 |
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
2448 |
|
} |
2449 |
|
} |
2450 |
|
} |
2451 |
|
} |
2452 |
|
/////////////// |
2453 |
|
// process B // |
2454 |
|
/////////////// |
2455 |
|
if (!B.isEmpty()) { |
2456 |
|
add_EM_S=true; |
2457 |
|
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); |
2458 |
|
if (B.actsExpanded()) { |
2459 |
|
for (index_t k=0; k<numEq; k++) { |
2460 |
|
for (index_t m=0; m<numComp; m++) { |
2461 |
|
const register double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)]; |
2462 |
|
const register double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)]; |
2463 |
|
const register double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)]; |
2464 |
|
const register double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)]; |
2465 |
|
const register double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)]; |
2466 |
|
const register double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)]; |
2467 |
|
const register double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)]; |
2468 |
|
const register double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)]; |
2469 |
|
const register double tmp3_0 = B_0_0 + B_0_2; |
2470 |
|
const register double tmp1_0 = B_1_2 + B_1_3; |
2471 |
|
const register double tmp2_0 = B_0_1 + B_0_3; |
2472 |
|
const register double tmp0_0 = B_1_0 + B_1_1; |
2473 |
|
const register double tmp63_1 = B_1_1*w42; |
2474 |
|
const register double tmp79_1 = B_1_1*w40; |
2475 |
|
const register double tmp37_1 = tmp3_0*w35; |
2476 |
|
const register double tmp8_1 = tmp0_0*w32; |
2477 |
|
const register double tmp71_1 = B_0_1*w34; |
2478 |
|
const register double tmp19_1 = B_0_3*w31; |
2479 |
|
const register double tmp15_1 = B_0_3*w34; |
2480 |
|
const register double tmp9_1 = tmp3_0*w34; |
2481 |
|
const register double tmp35_1 = B_1_0*w36; |
2482 |
|
const register double tmp66_1 = B_0_3*w28; |
2483 |
|
const register double tmp28_1 = B_1_0*w42; |
2484 |
|
const register double tmp22_1 = B_1_0*w40; |
2485 |
|
const register double tmp16_1 = B_1_2*w29; |
2486 |
|
const register double tmp6_1 = tmp2_0*w35; |
2487 |
|
const register double tmp55_1 = B_1_3*w40; |
2488 |
|
const register double tmp50_1 = B_1_3*w42; |
2489 |
|
const register double tmp7_1 = tmp1_0*w29; |
2490 |
|
const register double tmp1_1 = tmp1_0*w32; |
2491 |
|
const register double tmp57_1 = B_0_3*w30; |
2492 |
|
const register double tmp18_1 = B_1_1*w32; |
2493 |
|
const register double tmp53_1 = B_1_0*w41; |
2494 |
|
const register double tmp61_1 = B_1_3*w36; |
2495 |
|
const register double tmp27_1 = B_0_3*w38; |
2496 |
|
const register double tmp64_1 = B_0_2*w30; |
2497 |
|
const register double tmp76_1 = B_0_1*w38; |
2498 |
|
const register double tmp39_1 = tmp2_0*w34; |
2499 |
|
const register double tmp62_1 = B_0_1*w31; |
2500 |
|
const register double tmp56_1 = B_0_0*w31; |
2501 |
|
const register double tmp49_1 = B_1_1*w36; |
2502 |
|
const register double tmp2_1 = B_0_2*w31; |
2503 |
|
const register double tmp23_1 = B_0_2*w33; |
2504 |
|
const register double tmp38_1 = B_1_1*w43; |
2505 |
|
const register double tmp74_1 = B_1_2*w41; |
2506 |
|
const register double tmp43_1 = B_1_1*w41; |
2507 |
|
const register double tmp58_1 = B_0_2*w28; |
2508 |
|
const register double tmp67_1 = B_0_0*w33; |
2509 |
|
const register double tmp33_1 = tmp0_0*w39; |
2510 |
|
const register double tmp4_1 = B_0_0*w28; |
2511 |
|
const register double tmp20_1 = B_0_0*w30; |
2512 |
|
const register double tmp13_1 = B_0_2*w38; |
2513 |
|
const register double tmp65_1 = B_1_2*w43; |
2514 |
|
const register double tmp0_1 = tmp0_0*w29; |
2515 |
|
const register double tmp41_1 = tmp3_0*w33; |
2516 |
|
const register double tmp73_1 = B_0_2*w37; |
2517 |
|
const register double tmp69_1 = B_0_0*w38; |
2518 |
|
const register double tmp48_1 = B_1_2*w39; |
2519 |
|
const register double tmp59_1 = B_0_1*w33; |
2520 |
|
const register double tmp17_1 = B_1_3*w41; |
2521 |
|
const register double tmp5_1 = B_0_3*w33; |
2522 |
|
const register double tmp3_1 = B_0_1*w30; |
2523 |
|
const register double tmp21_1 = B_0_1*w28; |
2524 |
|
const register double tmp42_1 = B_1_0*w29; |
2525 |
|
const register double tmp54_1 = B_1_2*w32; |
2526 |
|
const register double tmp60_1 = B_1_0*w39; |
2527 |
|
const register double tmp32_1 = tmp1_0*w36; |
2528 |
|
const register double tmp10_1 = B_0_1*w37; |
2529 |
|
const register double tmp14_1 = B_0_0*w35; |
2530 |
|
const register double tmp29_1 = B_0_1*w35; |
2531 |
|
const register double tmp26_1 = B_1_2*w36; |
2532 |
|
const register double tmp30_1 = B_1_3*w43; |
2533 |
|
const register double tmp70_1 = B_0_2*w35; |
2534 |
|
const register double tmp34_1 = B_1_3*w39; |
2535 |
|
const register double tmp51_1 = B_1_0*w43; |
2536 |
|
const register double tmp31_1 = B_0_2*w34; |
2537 |
|
const register double tmp45_1 = tmp3_0*w28; |
2538 |
|
const register double tmp11_1 = tmp1_0*w39; |
2539 |
|
const register double tmp52_1 = B_1_1*w29; |
2540 |
|
const register double tmp44_1 = B_1_3*w32; |
2541 |
|
const register double tmp25_1 = B_1_1*w39; |
2542 |
|
const register double tmp47_1 = tmp2_0*w33; |
2543 |
|
const register double tmp72_1 = B_1_3*w29; |
2544 |
|
const register double tmp40_1 = tmp2_0*w28; |
2545 |
|
const register double tmp46_1 = B_1_2*w40; |
2546 |
|
const register double tmp36_1 = B_1_2*w42; |
2547 |
|
const register double tmp24_1 = B_0_0*w37; |
2548 |
|
const register double tmp77_1 = B_0_3*w35; |
2549 |
|
const register double tmp68_1 = B_0_3*w37; |
2550 |
|
const register double tmp78_1 = B_0_0*w34; |
2551 |
|
const register double tmp12_1 = tmp0_0*w36; |
2552 |
|
const register double tmp75_1 = B_1_0*w32; |
2553 |
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
2554 |
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
2555 |
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
2556 |
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
2557 |
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
2558 |
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; |
2559 |
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
2560 |
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
2561 |
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
2562 |
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
2563 |
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
2564 |
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
2565 |
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
2566 |
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
2567 |
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; |
2568 |
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
2569 |
|
} |
2570 |
|
} |
2571 |
|
} else { /* constant data */ |
2572 |
|
for (index_t k=0; k<numEq; k++) { |
2573 |
|
for (index_t m=0; m<numComp; m++) { |
2574 |
|
const register double B_0 = B_p[INDEX3(k,0,m, numEq, 2)]; |
2575 |
|
const register double B_1 = B_p[INDEX3(k,1,m, numEq, 2)]; |
2576 |
|
const register double tmp6_1 = B_1*w50; |
2577 |
|
const register double tmp1_1 = B_1*w45; |
2578 |
|
const register double tmp5_1 = B_1*w49; |
2579 |
|
const register double tmp4_1 = B_1*w48; |
2580 |
|
const register double tmp0_1 = B_0*w44; |
2581 |
|
const register double tmp2_1 = B_0*w46; |
2582 |
|
const register double tmp7_1 = B_0*w51; |
2583 |
|
const register double tmp3_1 = B_0*w47; |
2584 |
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
2585 |
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1 + tmp2_1; |
2586 |
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; |
2587 |
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1 + tmp5_1; |
2588 |
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp3_1 + tmp6_1; |
2589 |
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1 + tmp4_1; |
2590 |
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp2_1 + tmp6_1; |
2591 |
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp4_1 + tmp7_1; |
2592 |
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp5_1 + tmp7_1; |
2593 |
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp6_1 + tmp7_1; |
2594 |
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp2_1 + tmp5_1; |
2595 |
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp4_1; |
2596 |
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1 + tmp6_1; |
2597 |
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; |
2598 |
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1 + tmp7_1; |
2599 |
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp3_1 + tmp5_1; |
2600 |
|
} |
2601 |
|
} |
2602 |
|
} |
2603 |
|
} |
2604 |
|
/////////////// |
2605 |
|
// process C // |
2606 |
|
/////////////// |
2607 |
|
if (!C.isEmpty()) { |
2608 |
|
add_EM_S=true; |
2609 |
|
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); |
2610 |
|
if (C.actsExpanded()) { |
2611 |
|
for (index_t k=0; k<numEq; k++) { |
2612 |
|
for (index_t m=0; m<numComp; m++) { |
2613 |
|
const register double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)]; |
2614 |
|
const register double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)]; |
2615 |
|
const register double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)]; |
2616 |
|
const register double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)]; |
2617 |
|
const register double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)]; |
2618 |
|
const register double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)]; |
2619 |
|
const register double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)]; |
2620 |
|
const register double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)]; |
2621 |
|
const register double tmp2_0 = C_0_1 + C_0_3; |
2622 |
|
const register double tmp1_0 = C_1_2 + C_1_3; |
2623 |
|
const register double tmp3_0 = C_0_0 + C_0_2; |
2624 |
|
const register double tmp0_0 = C_1_0 + C_1_1; |
2625 |
|
const register double tmp64_1 = C_0_2*w30; |
2626 |
|
const register double tmp14_1 = C_0_2*w28; |
2627 |
|
const register double tmp19_1 = C_0_3*w31; |
2628 |
|
const register double tmp22_1 = C_1_0*w40; |
2629 |
|
const register double tmp37_1 = tmp3_0*w35; |
2630 |
|
const register double tmp29_1 = C_0_1*w35; |
2631 |
|
const register double tmp73_1 = C_0_2*w37; |
2632 |
|
const register double tmp74_1 = C_1_2*w41; |
2633 |
|
const register double tmp52_1 = C_1_3*w39; |
2634 |
|
const register double tmp25_1 = C_1_1*w39; |
2635 |
|
const register double tmp62_1 = C_0_1*w31; |
2636 |
|
const register double tmp79_1 = C_1_1*w40; |
2637 |
|
const register double tmp43_1 = C_1_1*w36; |
2638 |
|
const register double tmp27_1 = C_0_3*w38; |
2639 |
|
const register double tmp28_1 = C_1_0*w42; |
2640 |
|
const register double tmp63_1 = C_1_1*w42; |
2641 |
|
const register double tmp59_1 = C_0_3*w34; |
2642 |
|
const register double tmp72_1 = C_1_3*w29; |
2643 |
|
const register double tmp40_1 = tmp2_0*w35; |
2644 |
|
const register double tmp13_1 = C_0_3*w30; |
2645 |
|
const register double tmp51_1 = C_1_2*w40; |
2646 |
|
const register double tmp54_1 = C_1_2*w42; |
2647 |
|
const register double tmp12_1 = C_0_0*w31; |
2648 |
|
const register double tmp2_1 = tmp1_0*w32; |
2649 |
|
const register double tmp68_1 = C_0_2*w31; |
2650 |
|
const register double tmp75_1 = C_1_0*w32; |
2651 |
|
const register double tmp49_1 = C_1_1*w41; |
2652 |
|
const register double tmp4_1 = C_0_2*w35; |
2653 |
|
const register double tmp66_1 = C_0_3*w28; |
2654 |
|
const register double tmp56_1 = C_0_1*w37; |
2655 |
|
const register double tmp5_1 = C_0_1*w34; |
2656 |
|
const register double tmp38_1 = tmp2_0*w34; |
2657 |
|
const register double tmp76_1 = C_0_1*w38; |
2658 |
|
const register double tmp21_1 = C_0_1*w28; |
2659 |
|
const register double tmp69_1 = C_0_1*w30; |
2660 |
|
const register double tmp53_1 = C_1_0*w36; |
2661 |
|
const register double tmp42_1 = C_1_2*w39; |
2662 |
|
const register double tmp32_1 = tmp1_0*w29; |
2663 |
|
const register double tmp45_1 = C_1_0*w43; |
2664 |
|
const register double tmp33_1 = tmp0_0*w32; |
2665 |
|
const register double tmp35_1 = C_1_0*w41; |
2666 |
|
const register double tmp26_1 = C_1_2*w36; |
2667 |
|
const register double tmp67_1 = C_0_0*w33; |
2668 |
|
const register double tmp31_1 = C_0_2*w34; |
2669 |
|
const register double tmp20_1 = C_0_0*w30; |
2670 |
|
const register double tmp70_1 = C_0_0*w28; |
2671 |
|
const register double tmp8_1 = tmp0_0*w39; |
2672 |
|
const register double tmp30_1 = C_1_3*w43; |
2673 |
|
const register double tmp0_1 = tmp0_0*w29; |
2674 |
|
const register double tmp17_1 = C_1_3*w41; |
2675 |
|
const register double tmp58_1 = C_0_0*w35; |
2676 |
|
const register double tmp9_1 = tmp3_0*w33; |
2677 |
|
const register double tmp61_1 = C_1_3*w36; |
2678 |
|
const register double tmp41_1 = tmp3_0*w34; |
2679 |
|
const register double tmp50_1 = C_1_3*w32; |
2680 |
|
const register double tmp18_1 = C_1_1*w32; |
2681 |
|
const register double tmp6_1 = tmp1_0*w36; |
2682 |
|
const register double tmp3_1 = C_0_0*w38; |
2683 |
|
const register double tmp34_1 = C_1_1*w29; |
2684 |
|
const register double tmp77_1 = C_0_3*w35; |
2685 |
|
const register double tmp65_1 = C_1_2*w43; |
2686 |
|
const register double tmp71_1 = C_0_3*w33; |
2687 |
|
const register double tmp55_1 = C_1_1*w43; |
2688 |
|
const register double tmp46_1 = tmp3_0*w28; |
2689 |
|
const register double tmp24_1 = C_0_0*w37; |
2690 |
|
const register double tmp10_1 = tmp1_0*w39; |
2691 |
|
const register double tmp48_1 = C_1_0*w29; |
2692 |
|
const register double tmp15_1 = C_0_1*w33; |
2693 |
|
const register double tmp36_1 = C_1_2*w32; |
2694 |
|
const register double tmp60_1 = C_1_0*w39; |
2695 |
|
const register double tmp47_1 = tmp2_0*w33; |
2696 |
|
const register double tmp16_1 = C_1_2*w29; |
2697 |
|
const register double tmp1_1 = C_0_3*w37; |
2698 |
|
const register double tmp7_1 = tmp2_0*w28; |
2699 |
|
const register double tmp39_1 = C_1_3*w40; |
2700 |
|
const register double tmp44_1 = C_1_3*w42; |
2701 |
|
const register double tmp57_1 = C_0_2*w38; |
2702 |
|
const register double tmp78_1 = C_0_0*w34; |
2703 |
|
const register double tmp11_1 = tmp0_0*w36; |
2704 |
|
const register double tmp23_1 = C_0_2*w33; |
2705 |
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
2706 |
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
2707 |
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
2708 |
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
2709 |
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
2710 |
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; |
2711 |
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
2712 |
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
2713 |
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
2714 |
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
2715 |
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
2716 |
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
2717 |
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
2718 |
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
2719 |
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; |
2720 |
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
2721 |
|
} |
2722 |
|
} |
2723 |
|
} else { /* constant data */ |
2724 |
|
for (index_t k=0; k<numEq; k++) { |
2725 |
|
for (index_t m=0; m<numComp; m++) { |
2726 |
|
const register double C_0 = C_p[INDEX3(k, m, 0, numEq, numComp)]; |
2727 |
|
const register double C_1 = C_p[INDEX3(k, m, 1, numEq, numComp)]; |
2728 |
|
const register double tmp1_1 = C_1*w45; |
2729 |
|
const register double tmp3_1 = C_0*w51; |
2730 |
|
const register double tmp4_1 = C_0*w44; |
2731 |
|
const register double tmp7_1 = C_0*w46; |
2732 |
|
const register double tmp5_1 = C_1*w49; |
2733 |
|
const register double tmp2_1 = C_1*w48; |
2734 |
|
const register double tmp0_1 = C_0*w47; |
2735 |
|
const register double tmp6_1 = C_1*w50; |
2736 |
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
2737 |
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1 + tmp3_1; |
2738 |
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp2_1 + tmp4_1; |
2739 |
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp4_1 + tmp5_1; |
2740 |
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1 + tmp6_1; |
2741 |
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; |
2742 |
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp5_1 + tmp7_1; |
2743 |
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1 + tmp7_1; |
2744 |
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp3_1 + tmp6_1; |
2745 |
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp3_1 + tmp5_1; |
2746 |
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp6_1 + tmp7_1; |
2747 |
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp2_1; |
2748 |
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp4_1 + tmp6_1; |
2749 |
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp4_1; |
2750 |
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2_1 + tmp7_1; |
2751 |
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1 + tmp5_1; |
2752 |
|
} |
2753 |
|
} |
2754 |
|
} |
2755 |
|
} |
2756 |
|
/////////////// |
2757 |
|
// process D // |
2758 |
|
/////////////// |
2759 |
|
if (!D.isEmpty()) { |
2760 |
|
add_EM_S=true; |
2761 |
|
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); |
2762 |
|
if (D.actsExpanded()) { |
2763 |
|
for (index_t k=0; k<numEq; k++) { |
2764 |
|
for (index_t m=0; m<numComp; m++) { |
2765 |
|
const register double D_0 = D_p[INDEX3(k, m, 0, numEq, numComp)]; |
2766 |
|
const register double D_1 = D_p[INDEX3(k, m, 1, numEq, numComp)]; |
2767 |
|
const register double D_2 = D_p[INDEX3(k, m, 2, numEq, numComp)]; |
2768 |
|
const register double D_3 = D_p[INDEX3(k, m, 3, numEq, numComp)]; |
2769 |
|
const register double tmp4_0 = D_1 + D_3; |
2770 |
|
const register double tmp2_0 = D_0 + D_1 + D_2 + D_3; |
2771 |
|
const register double tmp5_0 = D_0 + D_2; |
2772 |
|
const register double tmp0_0 = D_0 + D_1; |
2773 |
|
const register double tmp6_0 = D_0 + D_3; |
2774 |
|
const register double tmp1_0 = D_2 + D_3; |
2775 |
|
const register double tmp3_0 = D_1 + D_2; |
2776 |
|
const register double tmp16_1 = D_1*w56; |
2777 |
|
const register double tmp14_1 = tmp6_0*w54; |
2778 |
|
const register double tmp8_1 = D_3*w55; |
2779 |
|
const register double tmp2_1 = tmp2_0*w54; |
2780 |
|
const register double tmp12_1 = tmp5_0*w52; |
2781 |
|
const register double tmp4_1 = tmp0_0*w53; |
2782 |
|
const register double tmp3_1 = tmp1_0*w52; |
2783 |
|
const register double tmp13_1 = tmp4_0*w53; |
2784 |
|
const register double tmp10_1 = tmp4_0*w52; |
2785 |
|
const register double tmp1_1 = tmp1_0*w53; |
2786 |
|
const register double tmp7_1 = D_3*w56; |
2787 |
|
const register double tmp0_1 = tmp0_0*w52; |
2788 |
|
const register double tmp11_1 = tmp5_0*w53; |
2789 |
|
const register double tmp9_1 = D_0*w56; |
2790 |
|
const register double tmp5_1 = tmp3_0*w54; |
2791 |
|
const register double tmp18_1 = D_2*w56; |
2792 |
|
const register double tmp17_1 = D_1*w55; |
2793 |
|
const register double tmp6_1 = D_0*w55; |
2794 |
|
const register double tmp15_1 = D_2*w55; |
2795 |
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
2796 |
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1; |
2797 |
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; |
2798 |
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp5_1 + tmp6_1 + tmp7_1; |
2799 |
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp5_1 + tmp8_1 + tmp9_1; |
2800 |
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1; |
2801 |
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp10_1 + tmp11_1; |
2802 |
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp2_1; |
2803 |
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp12_1 + tmp13_1; |
2804 |
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp12_1 + tmp13_1; |
2805 |
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp10_1 + tmp11_1; |
2806 |
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp3_1 + tmp4_1; |
2807 |
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp14_1 + tmp15_1 + tmp16_1; |
2808 |
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1; |
2809 |
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2_1; |
2810 |
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp14_1 + tmp17_1 + tmp18_1; |
2811 |
|
} |
2812 |
|
} |
2813 |
|
} else { /* constant data */ |
2814 |
|
for (index_t k=0; k<numEq; k++) { |
2815 |
|
for (index_t m=0; m<numComp; m++) { |
2816 |
|
const register double D_0 = D_p[INDEX2(k, m, numEq)]; |
2817 |
|
const register double tmp2_1 = D_0*w59; |
2818 |
|
const register double tmp1_1 = D_0*w58; |
2819 |
|
const register double tmp0_1 = D_0*w57; |
2820 |
|
EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1; |
2821 |
|
EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1; |
2822 |
|
EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1; |
2823 |
|
EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp2_1; |
2824 |
|
EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp2_1; |
2825 |
|
EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1; |
2826 |
|
EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp0_1; |
2827 |
|
EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1; |
2828 |
|
EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp0_1; |
2829 |
|
EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1; |
2830 |
|
EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp0_1; |
2831 |
|
EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1; |
2832 |
|
EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp2_1; |
2833 |
|
EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1; |
2834 |
|
EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1; |
2835 |
|
EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp2_1; |
2836 |
|
} |
2837 |
|
} |
2838 |
|
} |
2839 |
|
} |
2840 |
|
/////////////// |
2841 |
|
// process X // |
2842 |
|
/////////////// |
2843 |
|
if (!X.isEmpty()) { |
2844 |
|
add_EM_F=true; |
2845 |
|
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); |
2846 |
|
if (X.actsExpanded()) { |
2847 |
|
for (index_t k=0; k<numEq; k++) { |
2848 |
|
const register double X_0_0 = X_p[INDEX3(k, 0, 0, numEq, 2)]; |
2849 |
|
const register double X_1_0 = X_p[INDEX3(k, 1, 0, numEq, 2)]; |
2850 |
|
const register double X_0_1 = X_p[INDEX3(k, 0, 1, numEq, 2)]; |
2851 |
|
const register double X_1_1 = X_p[INDEX3(k, 1, 1, numEq, 2)]; |
2852 |
|
const register double X_0_2 = X_p[INDEX3(k, 0, 2, numEq, 2)]; |
2853 |
|
const register double X_1_2 = X_p[INDEX3(k, 1, 2, numEq, 2)]; |
2854 |
|
const register double X_0_3 = X_p[INDEX3(k, 0, 3, numEq, 2)]; |
2855 |
|
const register double X_1_3 = X_p[INDEX3(k, 1, 3, numEq, 2)]; |
2856 |
|
const register double tmp1_0 = X_1_1 + X_1_3; |
2857 |
|
const register double tmp3_0 = X_0_0 + X_0_1; |
2858 |
|
const register double tmp2_0 = X_1_0 + X_1_2; |
2859 |
|
const register double tmp0_0 = X_0_2 + X_0_3; |
2860 |
|
const register double tmp8_1 = tmp2_0*w66; |
2861 |
|
const register double tmp5_1 = tmp3_0*w64; |
2862 |
|
const register double tmp14_1 = tmp0_0*w64; |
2863 |
|
const register double tmp3_1 = tmp3_0*w60; |
2864 |
|
const register double tmp9_1 = tmp3_0*w63; |
2865 |
|
const register double tmp13_1 = tmp3_0*w65; |
2866 |
|
const register double tmp12_1 = tmp1_0*w66; |
2867 |
|
const register double tmp10_1 = tmp0_0*w60; |
2868 |
|
const register double tmp2_1 = tmp2_0*w61; |
2869 |
|
const register double tmp6_1 = tmp2_0*w62; |
2870 |
|
const register double tmp4_1 = tmp0_0*w65; |
2871 |
|
const register double tmp11_1 = tmp1_0*w67; |
2872 |
|
const register double tmp1_1 = tmp1_0*w62; |
2873 |
|
const register double tmp7_1 = tmp1_0*w61; |
2874 |
|
const register double tmp0_1 = tmp0_0*w63; |
2875 |
|
const register double tmp15_1 = tmp2_0*w67; |
2876 |
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
2877 |
|
EM_F[INDEX2(k,1,numEq)]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; |
2878 |
|
EM_F[INDEX2(k,2,numEq)]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; |
2879 |
|
EM_F[INDEX2(k,3,numEq)]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
2880 |
|
} |
2881 |
|
} else { /* constant data */ |
2882 |
|
for (index_t k=0; k<numEq; k++) { |
2883 |
|
const register double X_0 = X_p[INDEX2(k, 0, numEq)]; |
2884 |
|
const register double X_1 = X_p[INDEX2(k, 1, numEq)]; |
2885 |
|
const register double tmp3_1 = X_1*w71; |
2886 |
|
const register double tmp2_1 = X_0*w70; |
2887 |
|
const register double tmp1_1 = X_0*w68; |
2888 |
|
const register double tmp0_1 = X_1*w69; |
2889 |
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1; |
2890 |
|
EM_F[INDEX2(k,1,numEq)]+=tmp0_1 + tmp2_1; |
2891 |
|
EM_F[INDEX2(k,2,numEq)]+=tmp1_1 + tmp3_1; |
2892 |
|
EM_F[INDEX2(k,3,numEq)]+=tmp2_1 + tmp3_1; |
2893 |
|
} |
2894 |
|
} |
2895 |
|
} |
2896 |
|
/////////////// |
2897 |
|
// process Y // |
2898 |
|
/////////////// |
2899 |
|
if (!Y.isEmpty()) { |
2900 |
|
add_EM_F=true; |
2901 |
|
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); |
2902 |
|
if (Y.actsExpanded()) { |
2903 |
|
for (index_t k=0; k<numEq; k++) { |
2904 |
|
const register double Y_0 = Y_p[INDEX2(k, 0, numEq)]; |
2905 |
|
const register double Y_1 = Y_p[INDEX2(k, 1, numEq)]; |
2906 |
|
const register double Y_2 = Y_p[INDEX2(k, 2, numEq)]; |
2907 |
|
const register double Y_3 = Y_p[INDEX2(k, 3, numEq)]; |
2908 |
|
const register double tmp0_0 = Y_1 + Y_2; |
2909 |
|
const register double tmp1_0 = Y_0 + Y_3; |
2910 |
|
const register double tmp9_1 = Y_0*w74; |
2911 |
|
const register double tmp4_1 = tmp1_0*w73; |
2912 |
|
const register double tmp5_1 = Y_2*w74; |
2913 |
|
const register double tmp7_1 = Y_1*w74; |
2914 |
|
const register double tmp6_1 = Y_2*w72; |
2915 |
|
const register double tmp2_1 = Y_3*w74; |
2916 |
|
const register double tmp8_1 = Y_3*w72; |
2917 |
|
const register double tmp3_1 = Y_1*w72; |
2918 |
|
const register double tmp0_1 = Y_0*w72; |
2919 |
|
const register double tmp1_1 = tmp0_0*w73; |
2920 |
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1 + tmp1_1 + tmp2_1; |
2921 |
|
EM_F[INDEX2(k,1,numEq)]+=tmp3_1 + tmp4_1 + tmp5_1; |
2922 |
|
EM_F[INDEX2(k,2,numEq)]+=tmp4_1 + tmp6_1 + tmp7_1; |
2923 |
|
EM_F[INDEX2(k,3,numEq)]+=tmp1_1 + tmp8_1 + tmp9_1; |
2924 |
|
} |
2925 |
|
} else { /* constant data */ |
2926 |
|
for (index_t k=0; k<numEq; k++) { |
2927 |
|
const register double Y_0 = Y_p[k]; |
2928 |
|
const register double tmp0_1 = Y_0*w75; |
2929 |
|
EM_F[INDEX2(k,0,numEq)]+=tmp0_1; |
2930 |
|
EM_F[INDEX2(k,1,numEq)]+=tmp0_1; |
2931 |
|
EM_F[INDEX2(k,2,numEq)]+=tmp0_1; |
2932 |
|
EM_F[INDEX2(k,3,numEq)]+=tmp0_1; |
2933 |
|
} |
2934 |
|
} |
2935 |
|
} |
2936 |
|
/* GENERATOR SNIP_PDE_SYSTEM BOTTOM */ |
2937 |
|
|
2938 |
|
// add to matrix (if add_EM_S) and RHS (if add_EM_F) |
2939 |
|
const index_t firstNode=m_N0*k1+k0; |
2940 |
|
IndexVector rowIndex; |
2941 |
|
rowIndex.push_back(m_dofMap[firstNode]); |
2942 |
|
rowIndex.push_back(m_dofMap[firstNode+1]); |
2943 |
|
rowIndex.push_back(m_dofMap[firstNode+m_N0]); |
2944 |
|
rowIndex.push_back(m_dofMap[firstNode+m_N0+1]); |
2945 |
|
if (add_EM_F) { |
2946 |
|
//cout << "-- AddtoRHS -- " << endl; |
2947 |
|
double *F_p=rhs.getSampleDataRW(0); |
2948 |
|
for (index_t i=0; i<rowIndex.size(); i++) { |
2949 |
|
if (rowIndex[i]<getNumDOF()) { |
2950 |
|
F_p[rowIndex[i]]+=EM_F[i]; |
2951 |
|
//cout << "F[" << rowIndex[i] << "]=" << F_p[rowIndex[i]] << endl; |
2952 |
|
} |
2953 |
|
} |
2954 |
|
//cout << "---"<<endl; |
2955 |
|
} |
2956 |
|
if (add_EM_S) { |
2957 |
|
//cout << "-- AddtoSystem -- " << endl; |
2958 |
|
addToSystemMatrix(mat, rowIndex, numEq, rowIndex, numComp, EM_S); |
2959 |
|
} |
2960 |
|
|
2961 |
|
} // end k0 loop |
2962 |
|
} // end k1 loop |
2963 |
|
} // end of coloring |
2964 |
|
} // end of parallel region |
2965 |
|
} |
2966 |
|
|
2967 |
|
|
2968 |
} // end of namespace ripley |
} // end of namespace ripley |
2969 |
|
|