1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2011 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
#include <ripley/Rectangle.h> |
15 |
extern "C" { |
16 |
#include "paso/SystemMatrix.h" |
17 |
} |
18 |
|
19 |
#if USE_SILO |
20 |
#include <silo.h> |
21 |
#ifdef ESYS_MPI |
22 |
#include <pmpio.h> |
23 |
#endif |
24 |
#endif |
25 |
|
26 |
#include <iomanip> |
27 |
|
28 |
using namespace std; |
29 |
|
30 |
namespace ripley { |
31 |
|
32 |
Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) : |
33 |
RipleyDomain(2), |
34 |
m_gNE0(n0), |
35 |
m_gNE1(n1), |
36 |
m_l0(l0), |
37 |
m_l1(l1), |
38 |
m_NX(d0), |
39 |
m_NY(d1) |
40 |
{ |
41 |
// ensure number of subdivisions is valid and nodes can be distributed |
42 |
// among number of ranks |
43 |
if (m_NX*m_NY != m_mpiInfo->size) |
44 |
throw RipleyException("Invalid number of spatial subdivisions"); |
45 |
|
46 |
if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0) |
47 |
throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension"); |
48 |
|
49 |
if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2)) |
50 |
throw RipleyException("Too few elements for the number of ranks"); |
51 |
|
52 |
// local number of elements (including overlap) |
53 |
m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0); |
54 |
if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1) |
55 |
m_NE0++; |
56 |
m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1); |
57 |
if (m_mpiInfo->rank/m_NX>0 && m_mpiInfo->rank/m_NX<m_NY-1) |
58 |
m_NE1++; |
59 |
|
60 |
// local number of nodes |
61 |
m_N0 = m_NE0+1; |
62 |
m_N1 = m_NE1+1; |
63 |
|
64 |
// bottom-left node is at (offset0,offset1) in global mesh |
65 |
m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX); |
66 |
if (m_offset0 > 0) |
67 |
m_offset0--; |
68 |
m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank/m_NX); |
69 |
if (m_offset1 > 0) |
70 |
m_offset1--; |
71 |
|
72 |
populateSampleIds(); |
73 |
createPattern(); |
74 |
} |
75 |
|
76 |
Rectangle::~Rectangle() |
77 |
{ |
78 |
} |
79 |
|
80 |
string Rectangle::getDescription() const |
81 |
{ |
82 |
return "ripley::Rectangle"; |
83 |
} |
84 |
|
85 |
bool Rectangle::operator==(const AbstractDomain& other) const |
86 |
{ |
87 |
const Rectangle* o=dynamic_cast<const Rectangle*>(&other); |
88 |
if (o) { |
89 |
return (RipleyDomain::operator==(other) && |
90 |
m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 |
91 |
&& m_l0==o->m_l0 && m_l1==o->m_l1 |
92 |
&& m_NX==o->m_NX && m_NY==o->m_NY); |
93 |
} |
94 |
|
95 |
return false; |
96 |
} |
97 |
|
98 |
void Rectangle::dump(const string& fileName) const |
99 |
{ |
100 |
#if USE_SILO |
101 |
string fn(fileName); |
102 |
if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) { |
103 |
fn+=".silo"; |
104 |
} |
105 |
|
106 |
const int NUM_SILO_FILES = 1; |
107 |
const char* blockDirFmt = "/block%04d"; |
108 |
int driver=DB_HDF5; |
109 |
string siloPath; |
110 |
DBfile* dbfile = NULL; |
111 |
|
112 |
#ifdef ESYS_MPI |
113 |
PMPIO_baton_t* baton = NULL; |
114 |
#endif |
115 |
|
116 |
if (m_mpiInfo->size > 1) { |
117 |
#ifdef ESYS_MPI |
118 |
baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm, |
119 |
0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen, |
120 |
PMPIO_DefaultClose, (void*)&driver); |
121 |
// try the fallback driver in case of error |
122 |
if (!baton && driver != DB_PDB) { |
123 |
driver = DB_PDB; |
124 |
baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm, |
125 |
0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen, |
126 |
PMPIO_DefaultClose, (void*)&driver); |
127 |
} |
128 |
if (baton) { |
129 |
char str[64]; |
130 |
snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank)); |
131 |
siloPath = str; |
132 |
dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str()); |
133 |
} |
134 |
#endif |
135 |
} else { |
136 |
dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL, |
137 |
getDescription().c_str(), driver); |
138 |
// try the fallback driver in case of error |
139 |
if (!dbfile && driver != DB_PDB) { |
140 |
driver = DB_PDB; |
141 |
dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL, |
142 |
getDescription().c_str(), driver); |
143 |
} |
144 |
} |
145 |
|
146 |
if (!dbfile) |
147 |
throw RipleyException("dump: Could not create Silo file"); |
148 |
|
149 |
/* |
150 |
if (driver==DB_HDF5) { |
151 |
// gzip level 1 already provides good compression with minimal |
152 |
// performance penalty. Some tests showed that gzip levels >3 performed |
153 |
// rather badly on escript data both in terms of time and space |
154 |
DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1"); |
155 |
} |
156 |
*/ |
157 |
|
158 |
boost::scoped_ptr<double> x(new double[m_N0]); |
159 |
boost::scoped_ptr<double> y(new double[m_N1]); |
160 |
double* coords[2] = { x.get(), y.get() }; |
161 |
pair<double,double> xdx = getFirstCoordAndSpacing(0); |
162 |
pair<double,double> ydy = getFirstCoordAndSpacing(1); |
163 |
#pragma omp parallel |
164 |
{ |
165 |
#pragma omp for nowait |
166 |
for (dim_t i0 = 0; i0 < m_N0; i0++) { |
167 |
coords[0][i0]=xdx.first+i0*xdx.second; |
168 |
} |
169 |
#pragma omp for nowait |
170 |
for (dim_t i1 = 0; i1 < m_N1; i1++) { |
171 |
coords[1][i1]=ydy.first+i1*ydy.second; |
172 |
} |
173 |
} |
174 |
IndexVector dims = getNumNodesPerDim(); |
175 |
|
176 |
// write mesh |
177 |
DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE, |
178 |
DB_COLLINEAR, NULL); |
179 |
|
180 |
// write node ids |
181 |
DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2, |
182 |
NULL, 0, DB_INT, DB_NODECENT, NULL); |
183 |
|
184 |
// write element ids |
185 |
dims = getNumElementsPerDim(); |
186 |
DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0], |
187 |
&dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL); |
188 |
|
189 |
// rank 0 writes multimesh and multivar |
190 |
if (m_mpiInfo->rank == 0) { |
191 |
vector<string> tempstrings; |
192 |
vector<char*> names; |
193 |
for (dim_t i=0; i<m_mpiInfo->size; i++) { |
194 |
stringstream path; |
195 |
path << "/block" << setw(4) << setfill('0') << right << i << "/mesh"; |
196 |
tempstrings.push_back(path.str()); |
197 |
names.push_back((char*)tempstrings.back().c_str()); |
198 |
} |
199 |
vector<int> types(m_mpiInfo->size, DB_QUAD_RECT); |
200 |
DBSetDir(dbfile, "/"); |
201 |
DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0], |
202 |
&types[0], NULL); |
203 |
tempstrings.clear(); |
204 |
names.clear(); |
205 |
for (dim_t i=0; i<m_mpiInfo->size; i++) { |
206 |
stringstream path; |
207 |
path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId"; |
208 |
tempstrings.push_back(path.str()); |
209 |
names.push_back((char*)tempstrings.back().c_str()); |
210 |
} |
211 |
types.assign(m_mpiInfo->size, DB_QUADVAR); |
212 |
DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0], |
213 |
&types[0], NULL); |
214 |
tempstrings.clear(); |
215 |
names.clear(); |
216 |
for (dim_t i=0; i<m_mpiInfo->size; i++) { |
217 |
stringstream path; |
218 |
path << "/block" << setw(4) << setfill('0') << right << i << "/elementId"; |
219 |
tempstrings.push_back(path.str()); |
220 |
names.push_back((char*)tempstrings.back().c_str()); |
221 |
} |
222 |
DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0], |
223 |
&types[0], NULL); |
224 |
} |
225 |
|
226 |
if (m_mpiInfo->size > 1) { |
227 |
#ifdef ESYS_MPI |
228 |
PMPIO_HandOffBaton(baton, dbfile); |
229 |
PMPIO_Finish(baton); |
230 |
#endif |
231 |
} else { |
232 |
DBClose(dbfile); |
233 |
} |
234 |
|
235 |
#else // USE_SILO |
236 |
throw RipleyException("dump(): no Silo support"); |
237 |
#endif |
238 |
} |
239 |
|
240 |
const int* Rectangle::borrowSampleReferenceIDs(int fsType) const |
241 |
{ |
242 |
switch (fsType) { |
243 |
case Nodes: |
244 |
case ReducedNodes: //FIXME: reduced |
245 |
return &m_nodeId[0]; |
246 |
case DegreesOfFreedom: |
247 |
case ReducedDegreesOfFreedom: //FIXME: reduced |
248 |
return &m_dofId[0]; |
249 |
case Elements: |
250 |
case ReducedElements: |
251 |
return &m_elementId[0]; |
252 |
case FaceElements: |
253 |
case ReducedFaceElements: |
254 |
return &m_faceId[0]; |
255 |
default: |
256 |
break; |
257 |
} |
258 |
|
259 |
stringstream msg; |
260 |
msg << "borrowSampleReferenceIDs() not implemented for function space type " |
261 |
<< functionSpaceTypeAsString(fsType); |
262 |
throw RipleyException(msg.str()); |
263 |
} |
264 |
|
265 |
bool Rectangle::ownSample(int fsType, index_t id) const |
266 |
{ |
267 |
if (getMPISize()==1) |
268 |
return true; |
269 |
|
270 |
switch (fsType) { |
271 |
case Nodes: |
272 |
case ReducedNodes: //FIXME: reduced |
273 |
return (m_dofMap[id] < getNumDOF()); |
274 |
case DegreesOfFreedom: |
275 |
case ReducedDegreesOfFreedom: |
276 |
return true; |
277 |
case Elements: |
278 |
case ReducedElements: |
279 |
// check ownership of element's bottom left node |
280 |
return (m_dofMap[id%m_NE0+m_N0*(id/m_NE0)] < getNumDOF()); |
281 |
case FaceElements: |
282 |
case ReducedFaceElements: |
283 |
{ |
284 |
// check ownership of face element's first node |
285 |
const IndexVector faces = getNumFacesPerBoundary(); |
286 |
dim_t n=0; |
287 |
for (size_t i=0; i<faces.size(); i++) { |
288 |
n+=faces[i]; |
289 |
if (id<n) { |
290 |
index_t k; |
291 |
if (i==1) |
292 |
k=m_N0-1; |
293 |
else if (i==3) |
294 |
k=m_N0*(m_N1-1); |
295 |
else |
296 |
k=0; |
297 |
// determine whether to move right or up |
298 |
const index_t delta=(i/2==0 ? m_N0 : 1); |
299 |
return (m_dofMap[k+(id-n+faces[i])*delta] < getNumDOF()); |
300 |
} |
301 |
} |
302 |
return false; |
303 |
} |
304 |
default: |
305 |
break; |
306 |
} |
307 |
|
308 |
stringstream msg; |
309 |
msg << "ownSample() not implemented for " |
310 |
<< functionSpaceTypeAsString(fsType); |
311 |
throw RipleyException(msg.str()); |
312 |
} |
313 |
|
314 |
void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const |
315 |
{ |
316 |
escript::Data& in = *const_cast<escript::Data*>(&cIn); |
317 |
const dim_t numComp = in.getDataPointSize(); |
318 |
const double h0 = m_l0/m_gNE0; |
319 |
const double h1 = m_l1/m_gNE1; |
320 |
const double cx0 = -1./h0; |
321 |
const double cx1 = -.78867513459481288225/h0; |
322 |
const double cx2 = -.5/h0; |
323 |
const double cx3 = -.21132486540518711775/h0; |
324 |
const double cx4 = .21132486540518711775/h0; |
325 |
const double cx5 = .5/h0; |
326 |
const double cx6 = .78867513459481288225/h0; |
327 |
const double cx7 = 1./h0; |
328 |
const double cy0 = -1./h1; |
329 |
const double cy1 = -.78867513459481288225/h1; |
330 |
const double cy2 = -.5/h1; |
331 |
const double cy3 = -.21132486540518711775/h1; |
332 |
const double cy4 = .21132486540518711775/h1; |
333 |
const double cy5 = .5/h1; |
334 |
const double cy6 = .78867513459481288225/h1; |
335 |
const double cy7 = 1./h1; |
336 |
|
337 |
if (out.getFunctionSpace().getTypeCode() == Elements) { |
338 |
/*** GENERATOR SNIP_GRAD_ELEMENTS TOP */ |
339 |
#pragma omp parallel for |
340 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
341 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
342 |
const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); |
343 |
const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); |
344 |
const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); |
345 |
const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); |
346 |
double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); |
347 |
for (index_t i=0; i < numComp; ++i) { |
348 |
o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
349 |
o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; |
350 |
o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
351 |
o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; |
352 |
o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; |
353 |
o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; |
354 |
o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; |
355 |
o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; |
356 |
} /* end of component loop i */ |
357 |
} /* end of k0 loop */ |
358 |
} /* end of k1 loop */ |
359 |
/* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */ |
360 |
} else if (out.getFunctionSpace().getTypeCode() == ReducedElements) { |
361 |
/*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */ |
362 |
#pragma omp parallel for |
363 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
364 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
365 |
const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); |
366 |
const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); |
367 |
const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); |
368 |
const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); |
369 |
double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); |
370 |
for (index_t i=0; i < numComp; ++i) { |
371 |
o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); |
372 |
o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]); |
373 |
} /* end of component loop i */ |
374 |
} /* end of k0 loop */ |
375 |
} /* end of k1 loop */ |
376 |
/* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */ |
377 |
} else if (out.getFunctionSpace().getTypeCode() == FaceElements) { |
378 |
#pragma omp parallel |
379 |
{ |
380 |
/*** GENERATOR SNIP_GRAD_FACES TOP */ |
381 |
if (m_faceOffset[0] > -1) { |
382 |
#pragma omp for nowait |
383 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
384 |
const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); |
385 |
const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); |
386 |
const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); |
387 |
const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); |
388 |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
389 |
for (index_t i=0; i < numComp; ++i) { |
390 |
o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
391 |
o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; |
392 |
o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; |
393 |
o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; |
394 |
} /* end of component loop i */ |
395 |
} /* end of k1 loop */ |
396 |
} /* end of face 0 */ |
397 |
if (m_faceOffset[1] > -1) { |
398 |
#pragma omp for nowait |
399 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
400 |
const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); |
401 |
const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); |
402 |
const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); |
403 |
const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); |
404 |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
405 |
for (index_t i=0; i < numComp; ++i) { |
406 |
o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; |
407 |
o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; |
408 |
o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; |
409 |
o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; |
410 |
} /* end of component loop i */ |
411 |
} /* end of k1 loop */ |
412 |
} /* end of face 1 */ |
413 |
if (m_faceOffset[2] > -1) { |
414 |
#pragma omp for nowait |
415 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
416 |
const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); |
417 |
const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); |
418 |
const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); |
419 |
const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); |
420 |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
421 |
for (index_t i=0; i < numComp; ++i) { |
422 |
o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; |
423 |
o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; |
424 |
o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; |
425 |
o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; |
426 |
} /* end of component loop i */ |
427 |
} /* end of k0 loop */ |
428 |
} /* end of face 2 */ |
429 |
if (m_faceOffset[3] > -1) { |
430 |
#pragma omp for nowait |
431 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
432 |
const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); |
433 |
const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); |
434 |
const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); |
435 |
const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); |
436 |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
437 |
for (index_t i=0; i < numComp; ++i) { |
438 |
o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; |
439 |
o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; |
440 |
o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; |
441 |
o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; |
442 |
} /* end of component loop i */ |
443 |
} /* end of k0 loop */ |
444 |
} /* end of face 3 */ |
445 |
/* GENERATOR SNIP_GRAD_FACES BOTTOM */ |
446 |
} // end of parallel section |
447 |
} else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
448 |
#pragma omp parallel |
449 |
{ |
450 |
/*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */ |
451 |
if (m_faceOffset[0] > -1) { |
452 |
#pragma omp for nowait |
453 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
454 |
const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); |
455 |
const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); |
456 |
const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); |
457 |
const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); |
458 |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
459 |
for (index_t i=0; i < numComp; ++i) { |
460 |
o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); |
461 |
o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; |
462 |
} /* end of component loop i */ |
463 |
} /* end of k1 loop */ |
464 |
} /* end of face 0 */ |
465 |
if (m_faceOffset[1] > -1) { |
466 |
#pragma omp for nowait |
467 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
468 |
const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); |
469 |
const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); |
470 |
const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); |
471 |
const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); |
472 |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
473 |
for (index_t i=0; i < numComp; ++i) { |
474 |
o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); |
475 |
o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; |
476 |
} /* end of component loop i */ |
477 |
} /* end of k1 loop */ |
478 |
} /* end of face 1 */ |
479 |
if (m_faceOffset[2] > -1) { |
480 |
#pragma omp for nowait |
481 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
482 |
const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); |
483 |
const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); |
484 |
const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); |
485 |
const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); |
486 |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
487 |
for (index_t i=0; i < numComp; ++i) { |
488 |
o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; |
489 |
o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]); |
490 |
} /* end of component loop i */ |
491 |
} /* end of k0 loop */ |
492 |
} /* end of face 2 */ |
493 |
if (m_faceOffset[3] > -1) { |
494 |
#pragma omp for nowait |
495 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
496 |
const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); |
497 |
const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); |
498 |
const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); |
499 |
const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); |
500 |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
501 |
for (index_t i=0; i < numComp; ++i) { |
502 |
o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; |
503 |
o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]); |
504 |
} /* end of component loop i */ |
505 |
} /* end of k0 loop */ |
506 |
} /* end of face 3 */ |
507 |
/* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */ |
508 |
} // end of parallel section |
509 |
} else { |
510 |
stringstream msg; |
511 |
msg << "setToGradient() not implemented for " |
512 |
<< functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode()); |
513 |
throw RipleyException(msg.str()); |
514 |
} |
515 |
} |
516 |
|
517 |
void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const |
518 |
{ |
519 |
escript::Data& in = *const_cast<escript::Data*>(&arg); |
520 |
const dim_t numComp = in.getDataPointSize(); |
521 |
const double h0 = m_l0/m_gNE0; |
522 |
const double h1 = m_l1/m_gNE1; |
523 |
if (arg.getFunctionSpace().getTypeCode() == Elements) { |
524 |
const double w_0 = h0*h1/4.; |
525 |
#pragma omp parallel |
526 |
{ |
527 |
vector<double> int_local(numComp, 0); |
528 |
#pragma omp for nowait |
529 |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
530 |
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
531 |
const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0)); |
532 |
for (index_t i=0; i < numComp; ++i) { |
533 |
const register double f_0 = f[INDEX2(i,0,numComp)]; |
534 |
const register double f_1 = f[INDEX2(i,1,numComp)]; |
535 |
const register double f_2 = f[INDEX2(i,2,numComp)]; |
536 |
const register double f_3 = f[INDEX2(i,3,numComp)]; |
537 |
int_local[i]+=(f_0+f_1+f_2+f_3)*w_0; |
538 |
} /* end of component loop i */ |
539 |
} /* end of k0 loop */ |
540 |
} /* end of k1 loop */ |
541 |
|
542 |
#pragma omp critical |
543 |
for (index_t i=0; i<numComp; i++) |
544 |
integrals[i]+=int_local[i]; |
545 |
} // end of parallel section |
546 |
} else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) { |
547 |
const double w_0 = h0*h1; |
548 |
#pragma omp parallel |
549 |
{ |
550 |
vector<double> int_local(numComp, 0); |
551 |
#pragma omp for nowait |
552 |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
553 |
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
554 |
const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0)); |
555 |
for (index_t i=0; i < numComp; ++i) { |
556 |
int_local[i]+=f[i]*w_0; |
557 |
} /* end of component loop i */ |
558 |
} /* end of k0 loop */ |
559 |
} /* end of k1 loop */ |
560 |
|
561 |
#pragma omp critical |
562 |
for (index_t i=0; i<numComp; i++) |
563 |
integrals[i]+=int_local[i]; |
564 |
} // end of parallel section |
565 |
} else if (arg.getFunctionSpace().getTypeCode() == FaceElements) { |
566 |
const double w_0 = h0/2.; |
567 |
const double w_1 = h1/2.; |
568 |
#pragma omp parallel |
569 |
{ |
570 |
vector<double> int_local(numComp, 0); |
571 |
if (m_faceOffset[0] > -1) { |
572 |
#pragma omp for nowait |
573 |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
574 |
const double* f = in.getSampleDataRO(m_faceOffset[0]+k1); |
575 |
for (index_t i=0; i < numComp; ++i) { |
576 |
const register double f_0 = f[INDEX2(i,0,numComp)]; |
577 |
const register double f_1 = f[INDEX2(i,1,numComp)]; |
578 |
int_local[i]+=(f_0+f_1)*w_1; |
579 |
} /* end of component loop i */ |
580 |
} /* end of k1 loop */ |
581 |
} |
582 |
|
583 |
if (m_faceOffset[1] > -1) { |
584 |
#pragma omp for nowait |
585 |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
586 |
const double* f = in.getSampleDataRO(m_faceOffset[1]+k1); |
587 |
for (index_t i=0; i < numComp; ++i) { |
588 |
const register double f_0 = f[INDEX2(i,0,numComp)]; |
589 |
const register double f_1 = f[INDEX2(i,1,numComp)]; |
590 |
int_local[i]+=(f_0+f_1)*w_1; |
591 |
} /* end of component loop i */ |
592 |
} /* end of k1 loop */ |
593 |
} |
594 |
|
595 |
if (m_faceOffset[2] > -1) { |
596 |
#pragma omp for nowait |
597 |
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
598 |
const double* f = in.getSampleDataRO(m_faceOffset[2]+k0); |
599 |
for (index_t i=0; i < numComp; ++i) { |
600 |
const register double f_0 = f[INDEX2(i,0,numComp)]; |
601 |
const register double f_1 = f[INDEX2(i,1,numComp)]; |
602 |
int_local[i]+=(f_0+f_1)*w_0; |
603 |
} /* end of component loop i */ |
604 |
} /* end of k0 loop */ |
605 |
} |
606 |
|
607 |
if (m_faceOffset[3] > -1) { |
608 |
#pragma omp for nowait |
609 |
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
610 |
const double* f = in.getSampleDataRO(m_faceOffset[3]+k0); |
611 |
for (index_t i=0; i < numComp; ++i) { |
612 |
const register double f_0 = f[INDEX2(i,0,numComp)]; |
613 |
const register double f_1 = f[INDEX2(i,1,numComp)]; |
614 |
int_local[i]+=(f_0+f_1)*w_0; |
615 |
} /* end of component loop i */ |
616 |
} /* end of k0 loop */ |
617 |
} |
618 |
|
619 |
#pragma omp critical |
620 |
for (index_t i=0; i<numComp; i++) |
621 |
integrals[i]+=int_local[i]; |
622 |
} // end of parallel section |
623 |
} else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
624 |
#pragma omp parallel |
625 |
{ |
626 |
vector<double> int_local(numComp, 0); |
627 |
if (m_faceOffset[0] > -1) { |
628 |
#pragma omp for nowait |
629 |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
630 |
const double* f = in.getSampleDataRO(m_faceOffset[0]+k1); |
631 |
for (index_t i=0; i < numComp; ++i) { |
632 |
int_local[i]+=f[i]*h1; |
633 |
} /* end of component loop i */ |
634 |
} /* end of k1 loop */ |
635 |
} |
636 |
|
637 |
if (m_faceOffset[1] > -1) { |
638 |
#pragma omp for nowait |
639 |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
640 |
const double* f = in.getSampleDataRO(m_faceOffset[1]+k1); |
641 |
for (index_t i=0; i < numComp; ++i) { |
642 |
int_local[i]+=f[i]*h1; |
643 |
} /* end of component loop i */ |
644 |
} /* end of k1 loop */ |
645 |
} |
646 |
|
647 |
if (m_faceOffset[2] > -1) { |
648 |
#pragma omp for nowait |
649 |
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
650 |
const double* f = in.getSampleDataRO(m_faceOffset[2]+k0); |
651 |
for (index_t i=0; i < numComp; ++i) { |
652 |
int_local[i]+=f[i]*h0; |
653 |
} /* end of component loop i */ |
654 |
} /* end of k0 loop */ |
655 |
} |
656 |
|
657 |
if (m_faceOffset[3] > -1) { |
658 |
#pragma omp for nowait |
659 |
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
660 |
const double* f = in.getSampleDataRO(m_faceOffset[3]+k0); |
661 |
for (index_t i=0; i < numComp; ++i) { |
662 |
int_local[i]+=f[i]*h0; |
663 |
} /* end of component loop i */ |
664 |
} /* end of k0 loop */ |
665 |
} |
666 |
|
667 |
#pragma omp critical |
668 |
for (index_t i=0; i<numComp; i++) |
669 |
integrals[i]+=int_local[i]; |
670 |
} // end of parallel section |
671 |
} else { |
672 |
stringstream msg; |
673 |
msg << "setToIntegrals() not implemented for " |
674 |
<< functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode()); |
675 |
throw RipleyException(msg.str()); |
676 |
} |
677 |
} |
678 |
|
679 |
void Rectangle::setToNormal(escript::Data& out) const |
680 |
{ |
681 |
if (out.getFunctionSpace().getTypeCode() == FaceElements) { |
682 |
#pragma omp parallel |
683 |
{ |
684 |
if (m_faceOffset[0] > -1) { |
685 |
#pragma omp for nowait |
686 |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
687 |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
688 |
// set vector at two quadrature points |
689 |
*o++ = -1.; |
690 |
*o++ = 0.; |
691 |
*o++ = -1.; |
692 |
*o = 0.; |
693 |
} |
694 |
} |
695 |
|
696 |
if (m_faceOffset[1] > -1) { |
697 |
#pragma omp for nowait |
698 |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
699 |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
700 |
// set vector at two quadrature points |
701 |
*o++ = 1.; |
702 |
*o++ = 0.; |
703 |
*o++ = 1.; |
704 |
*o = 0.; |
705 |
} |
706 |
} |
707 |
|
708 |
if (m_faceOffset[2] > -1) { |
709 |
#pragma omp for nowait |
710 |
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
711 |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
712 |
// set vector at two quadrature points |
713 |
*o++ = 0.; |
714 |
*o++ = -1.; |
715 |
*o++ = 0.; |
716 |
*o = -1.; |
717 |
} |
718 |
} |
719 |
|
720 |
if (m_faceOffset[3] > -1) { |
721 |
#pragma omp for nowait |
722 |
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
723 |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
724 |
// set vector at two quadrature points |
725 |
*o++ = 0.; |
726 |
*o++ = 1.; |
727 |
*o++ = 0.; |
728 |
*o = 1.; |
729 |
} |
730 |
} |
731 |
} // end of parallel section |
732 |
} else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { |
733 |
#pragma omp parallel |
734 |
{ |
735 |
if (m_faceOffset[0] > -1) { |
736 |
#pragma omp for nowait |
737 |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
738 |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
739 |
*o++ = -1.; |
740 |
*o = 0.; |
741 |
} |
742 |
} |
743 |
|
744 |
if (m_faceOffset[1] > -1) { |
745 |
#pragma omp for nowait |
746 |
for (index_t k1 = 0; k1 < m_NE1; ++k1) { |
747 |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
748 |
*o++ = 1.; |
749 |
*o = 0.; |
750 |
} |
751 |
} |
752 |
|
753 |
if (m_faceOffset[2] > -1) { |
754 |
#pragma omp for nowait |
755 |
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
756 |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
757 |
*o++ = 0.; |
758 |
*o = -1.; |
759 |
} |
760 |
} |
761 |
|
762 |
if (m_faceOffset[3] > -1) { |
763 |
#pragma omp for nowait |
764 |
for (index_t k0 = 0; k0 < m_NE0; ++k0) { |
765 |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
766 |
*o++ = 0.; |
767 |
*o = 1.; |
768 |
} |
769 |
} |
770 |
} // end of parallel section |
771 |
|
772 |
} else { |
773 |
stringstream msg; |
774 |
msg << "setToNormal() not implemented for " |
775 |
<< functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode()); |
776 |
throw RipleyException(msg.str()); |
777 |
} |
778 |
} |
779 |
|
780 |
Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, |
781 |
bool reducedColOrder) const |
782 |
{ |
783 |
if (reducedRowOrder || reducedColOrder) |
784 |
throw RipleyException("getPattern() not implemented for reduced order"); |
785 |
|
786 |
return m_pattern; |
787 |
} |
788 |
|
789 |
void Rectangle::Print_Mesh_Info(const bool full) const |
790 |
{ |
791 |
RipleyDomain::Print_Mesh_Info(full); |
792 |
if (full) { |
793 |
cout << " Id Coordinates" << endl; |
794 |
cout.precision(15); |
795 |
cout.setf(ios::scientific, ios::floatfield); |
796 |
pair<double,double> xdx = getFirstCoordAndSpacing(0); |
797 |
pair<double,double> ydy = getFirstCoordAndSpacing(1); |
798 |
for (index_t i=0; i < getNumNodes(); i++) { |
799 |
cout << " " << setw(5) << m_nodeId[i] |
800 |
<< " " << xdx.first+(i%m_N0)*xdx.second |
801 |
<< " " << ydy.first+(i/m_N0)*ydy.second << endl; |
802 |
} |
803 |
} |
804 |
} |
805 |
|
806 |
IndexVector Rectangle::getNumNodesPerDim() const |
807 |
{ |
808 |
IndexVector ret; |
809 |
ret.push_back(m_N0); |
810 |
ret.push_back(m_N1); |
811 |
return ret; |
812 |
} |
813 |
|
814 |
IndexVector Rectangle::getNumElementsPerDim() const |
815 |
{ |
816 |
IndexVector ret; |
817 |
ret.push_back(m_NE0); |
818 |
ret.push_back(m_NE1); |
819 |
return ret; |
820 |
} |
821 |
|
822 |
IndexVector Rectangle::getNumFacesPerBoundary() const |
823 |
{ |
824 |
IndexVector ret(4, 0); |
825 |
//left |
826 |
if (m_offset0==0) |
827 |
ret[0]=m_NE1; |
828 |
//right |
829 |
if (m_mpiInfo->rank%m_NX==m_NX-1) |
830 |
ret[1]=m_NE1; |
831 |
//bottom |
832 |
if (m_offset1==0) |
833 |
ret[2]=m_NE0; |
834 |
//top |
835 |
if (m_mpiInfo->rank/m_NX==m_NY-1) |
836 |
ret[3]=m_NE0; |
837 |
return ret; |
838 |
} |
839 |
|
840 |
pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const |
841 |
{ |
842 |
if (dim==0) { |
843 |
return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0); |
844 |
} else if (dim==1) { |
845 |
return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1); |
846 |
} |
847 |
throw RipleyException("getFirstCoordAndSpacing(): invalid argument"); |
848 |
} |
849 |
|
850 |
//protected |
851 |
dim_t Rectangle::getNumDOF() const |
852 |
{ |
853 |
return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY; |
854 |
} |
855 |
|
856 |
//protected |
857 |
dim_t Rectangle::getNumFaceElements() const |
858 |
{ |
859 |
const IndexVector faces = getNumFacesPerBoundary(); |
860 |
dim_t n=0; |
861 |
for (size_t i=0; i<faces.size(); i++) |
862 |
n+=faces[i]; |
863 |
return n; |
864 |
} |
865 |
|
866 |
//protected |
867 |
void Rectangle::assembleCoordinates(escript::Data& arg) const |
868 |
{ |
869 |
escriptDataC x = arg.getDataC(); |
870 |
int numDim = m_numDim; |
871 |
if (!isDataPointShapeEqual(&x, 1, &numDim)) |
872 |
throw RipleyException("setToX: Invalid Data object shape"); |
873 |
if (!numSamplesEqual(&x, 1, getNumNodes())) |
874 |
throw RipleyException("setToX: Illegal number of samples in Data object"); |
875 |
|
876 |
pair<double,double> xdx = getFirstCoordAndSpacing(0); |
877 |
pair<double,double> ydy = getFirstCoordAndSpacing(1); |
878 |
arg.requireWrite(); |
879 |
#pragma omp parallel for |
880 |
for (dim_t i1 = 0; i1 < m_N1; i1++) { |
881 |
for (dim_t i0 = 0; i0 < m_N0; i0++) { |
882 |
double* point = arg.getSampleDataRW(i0+m_N0*i1); |
883 |
point[0] = xdx.first+i0*xdx.second; |
884 |
point[1] = ydy.first+i1*ydy.second; |
885 |
} |
886 |
} |
887 |
} |
888 |
|
889 |
//protected |
890 |
dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const |
891 |
{ |
892 |
const dim_t nDOF0 = (m_gNE0+1)/m_NX; |
893 |
const dim_t nDOF1 = (m_gNE1+1)/m_NY; |
894 |
const int x=node%nDOF0; |
895 |
const int y=node/nDOF0; |
896 |
dim_t num=0; |
897 |
// loop through potential neighbours and add to index if positions are |
898 |
// within bounds |
899 |
for (int i1=-1; i1<2; i1++) { |
900 |
for (int i0=-1; i0<2; i0++) { |
901 |
// skip node itself |
902 |
if (i0==0 && i1==0) |
903 |
continue; |
904 |
// location of neighbour node |
905 |
const int nx=x+i0; |
906 |
const int ny=y+i1; |
907 |
if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) { |
908 |
index.push_back(ny*nDOF0+nx); |
909 |
num++; |
910 |
} |
911 |
} |
912 |
} |
913 |
|
914 |
return num; |
915 |
} |
916 |
|
917 |
//protected |
918 |
void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const |
919 |
{ |
920 |
const dim_t numComp = in.getDataPointSize(); |
921 |
out.requireWrite(); |
922 |
|
923 |
const index_t left = (m_offset0==0 ? 0 : 1); |
924 |
const index_t bottom = (m_offset1==0 ? 0 : 1); |
925 |
const dim_t nDOF0 = (m_gNE0+1)/m_NX; |
926 |
const dim_t nDOF1 = (m_gNE1+1)/m_NY; |
927 |
#pragma omp parallel for |
928 |
for (index_t i=0; i<nDOF1; i++) { |
929 |
for (index_t j=0; j<nDOF0; j++) { |
930 |
const index_t n=j+left+(i+bottom)*m_N0; |
931 |
const double* src=in.getSampleDataRO(n); |
932 |
copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0)); |
933 |
} |
934 |
} |
935 |
} |
936 |
|
937 |
//protected |
938 |
void Rectangle::dofToNodes(escript::Data& out, escript::Data& in) const |
939 |
{ |
940 |
const dim_t numComp = in.getDataPointSize(); |
941 |
Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp); |
942 |
in.requireWrite(); |
943 |
Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0)); |
944 |
|
945 |
const dim_t numDOF = getNumDOF(); |
946 |
out.requireWrite(); |
947 |
const double* buffer = Paso_Coupler_finishCollect(coupler); |
948 |
|
949 |
#pragma omp parallel for |
950 |
for (index_t i=0; i<getNumNodes(); i++) { |
951 |
const double* src=(m_dofMap[i]<numDOF ? |
952 |
in.getSampleDataRO(m_dofMap[i]) |
953 |
: &buffer[(m_dofMap[i]-numDOF)*numComp]); |
954 |
copy(src, src+numComp, out.getSampleDataRW(i)); |
955 |
} |
956 |
} |
957 |
|
958 |
//private |
959 |
void Rectangle::populateSampleIds() |
960 |
{ |
961 |
// identifiers are ordered from left to right, bottom to top globablly. |
962 |
|
963 |
// build node distribution vector first. |
964 |
// rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes |
965 |
m_nodeDistribution.assign(m_mpiInfo->size+1, 0); |
966 |
const dim_t numDOF=getNumDOF(); |
967 |
for (dim_t k=1; k<m_mpiInfo->size; k++) { |
968 |
m_nodeDistribution[k]=k*numDOF; |
969 |
} |
970 |
m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal(); |
971 |
m_nodeId.resize(getNumNodes()); |
972 |
m_dofId.resize(numDOF); |
973 |
m_elementId.resize(getNumElements()); |
974 |
m_faceId.resize(getNumFaceElements()); |
975 |
|
976 |
#pragma omp parallel |
977 |
{ |
978 |
// nodes |
979 |
#pragma omp for nowait |
980 |
for (dim_t i1=0; i1<m_N1; i1++) { |
981 |
for (dim_t i0=0; i0<m_N0; i0++) { |
982 |
m_nodeId[i0+i1*m_N0] = (m_offset1+i1)*(m_gNE0+1)+m_offset0+i0; |
983 |
} |
984 |
} |
985 |
|
986 |
// degrees of freedom |
987 |
#pragma omp for nowait |
988 |
for (dim_t k=0; k<numDOF; k++) |
989 |
m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k; |
990 |
|
991 |
// elements |
992 |
#pragma omp for nowait |
993 |
for (dim_t i1=0; i1<m_NE1; i1++) { |
994 |
for (dim_t i0=0; i0<m_NE0; i0++) { |
995 |
m_elementId[i0+i1*m_NE0]=(m_offset1+i1)*m_gNE0+m_offset0+i0; |
996 |
} |
997 |
} |
998 |
|
999 |
// face elements |
1000 |
#pragma omp for |
1001 |
for (dim_t k=0; k<getNumFaceElements(); k++) |
1002 |
m_faceId[k]=k; |
1003 |
} // end parallel section |
1004 |
|
1005 |
m_nodeTags.assign(getNumNodes(), 0); |
1006 |
updateTagsInUse(Nodes); |
1007 |
|
1008 |
m_elementTags.assign(getNumElements(), 0); |
1009 |
updateTagsInUse(Elements); |
1010 |
|
1011 |
// generate face offset vector and set face tags |
1012 |
const IndexVector facesPerEdge = getNumFacesPerBoundary(); |
1013 |
const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20; |
1014 |
const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP }; |
1015 |
m_faceOffset.assign(facesPerEdge.size(), -1); |
1016 |
m_faceTags.clear(); |
1017 |
index_t offset=0; |
1018 |
for (size_t i=0; i<facesPerEdge.size(); i++) { |
1019 |
if (facesPerEdge[i]>0) { |
1020 |
m_faceOffset[i]=offset; |
1021 |
offset+=facesPerEdge[i]; |
1022 |
m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]); |
1023 |
} |
1024 |
} |
1025 |
setTagMap("left", LEFT); |
1026 |
setTagMap("right", RIGHT); |
1027 |
setTagMap("bottom", BOTTOM); |
1028 |
setTagMap("top", TOP); |
1029 |
updateTagsInUse(FaceElements); |
1030 |
} |
1031 |
|
1032 |
//private |
1033 |
void Rectangle::createPattern() |
1034 |
{ |
1035 |
const dim_t nDOF0 = (m_gNE0+1)/m_NX; |
1036 |
const dim_t nDOF1 = (m_gNE1+1)/m_NY; |
1037 |
const index_t left = (m_offset0==0 ? 0 : 1); |
1038 |
const index_t bottom = (m_offset1==0 ? 0 : 1); |
1039 |
|
1040 |
// populate node->DOF mapping with own degrees of freedom. |
1041 |
// The rest is assigned in the loop further down |
1042 |
m_dofMap.assign(getNumNodes(), 0); |
1043 |
#pragma omp parallel for |
1044 |
for (index_t i=bottom; i<m_N1; i++) { |
1045 |
for (index_t j=left; j<m_N0; j++) { |
1046 |
m_dofMap[i*m_N0+j]=(i-bottom)*nDOF0+j-left; |
1047 |
} |
1048 |
} |
1049 |
|
1050 |
// build list of shared components and neighbours by looping through |
1051 |
// all potential neighbouring ranks and checking if positions are |
1052 |
// within bounds |
1053 |
const dim_t numDOF=nDOF0*nDOF1; |
1054 |
vector<IndexVector> colIndices(numDOF); // for the couple blocks |
1055 |
RankVector neighbour; |
1056 |
IndexVector offsetInShared(1,0); |
1057 |
IndexVector sendShared, recvShared; |
1058 |
int numShared=0; |
1059 |
const int x=m_mpiInfo->rank%m_NX; |
1060 |
const int y=m_mpiInfo->rank/m_NX; |
1061 |
for (int i1=-1; i1<2; i1++) { |
1062 |
for (int i0=-1; i0<2; i0++) { |
1063 |
// skip this rank |
1064 |
if (i0==0 && i1==0) |
1065 |
continue; |
1066 |
// location of neighbour rank |
1067 |
const int nx=x+i0; |
1068 |
const int ny=y+i1; |
1069 |
if (nx>=0 && ny>=0 && nx<m_NX && ny<m_NY) { |
1070 |
neighbour.push_back(ny*m_NX+nx); |
1071 |
if (i0==0) { |
1072 |
// sharing top or bottom edge |
1073 |
const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0); |
1074 |
const int firstNode=(i1==-1 ? left : m_N0*(m_N1-1)+left); |
1075 |
offsetInShared.push_back(offsetInShared.back()+nDOF0); |
1076 |
for (dim_t i=0; i<nDOF0; i++, numShared++) { |
1077 |
sendShared.push_back(firstDOF+i); |
1078 |
recvShared.push_back(numDOF+numShared); |
1079 |
if (i>0) |
1080 |
colIndices[firstDOF+i-1].push_back(numShared); |
1081 |
colIndices[firstDOF+i].push_back(numShared); |
1082 |
if (i<nDOF0-1) |
1083 |
colIndices[firstDOF+i+1].push_back(numShared); |
1084 |
m_dofMap[firstNode+i]=numDOF+numShared; |
1085 |
} |
1086 |
} else if (i1==0) { |
1087 |
// sharing left or right edge |
1088 |
const int firstDOF=(i0==-1 ? 0 : nDOF0-1); |
1089 |
const int firstNode=(i0==-1 ? bottom*m_N0 : (bottom+1)*m_N0-1); |
1090 |
offsetInShared.push_back(offsetInShared.back()+nDOF1); |
1091 |
for (dim_t i=0; i<nDOF1; i++, numShared++) { |
1092 |
sendShared.push_back(firstDOF+i*nDOF0); |
1093 |
recvShared.push_back(numDOF+numShared); |
1094 |
if (i>0) |
1095 |
colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared); |
1096 |
colIndices[firstDOF+i*nDOF0].push_back(numShared); |
1097 |
if (i<nDOF1-1) |
1098 |
colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared); |
1099 |
m_dofMap[firstNode+i*m_N0]=numDOF+numShared; |
1100 |
} |
1101 |
} else { |
1102 |
// sharing a node |
1103 |
const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0); |
1104 |
const int node=(i0+1)/2*(m_N0-1)+(i1+1)/2*m_N0*(m_N1-1); |
1105 |
offsetInShared.push_back(offsetInShared.back()+1); |
1106 |
sendShared.push_back(dof); |
1107 |
recvShared.push_back(numDOF+numShared); |
1108 |
colIndices[dof].push_back(numShared); |
1109 |
m_dofMap[node]=numDOF+numShared; |
1110 |
++numShared; |
1111 |
} |
1112 |
} |
1113 |
} |
1114 |
} |
1115 |
|
1116 |
// create connector |
1117 |
Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( |
1118 |
numDOF, neighbour.size(), &neighbour[0], &sendShared[0], |
1119 |
&offsetInShared[0], 1, 0, m_mpiInfo); |
1120 |
Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc( |
1121 |
numDOF, neighbour.size(), &neighbour[0], &recvShared[0], |
1122 |
&offsetInShared[0], 1, 0, m_mpiInfo); |
1123 |
m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp); |
1124 |
Paso_SharedComponents_free(snd_shcomp); |
1125 |
Paso_SharedComponents_free(rcv_shcomp); |
1126 |
|
1127 |
// create main and couple blocks |
1128 |
Paso_Pattern *mainPattern = createMainPattern(); |
1129 |
Paso_Pattern *colPattern, *rowPattern; |
1130 |
createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern); |
1131 |
|
1132 |
// allocate paso distribution |
1133 |
Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, |
1134 |
const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0); |
1135 |
|
1136 |
// finally create the system matrix |
1137 |
m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT, |
1138 |
distribution, distribution, mainPattern, colPattern, rowPattern, |
1139 |
m_connector, m_connector); |
1140 |
|
1141 |
Paso_Distribution_free(distribution); |
1142 |
|
1143 |
// useful debug output |
1144 |
/* |
1145 |
cout << "--- rcv_shcomp ---" << endl; |
1146 |
cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl; |
1147 |
for (size_t i=0; i<neighbour.size(); i++) { |
1148 |
cout << "neighbor[" << i << "]=" << neighbour[i] |
1149 |
<< " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl; |
1150 |
} |
1151 |
for (size_t i=0; i<recvShared.size(); i++) { |
1152 |
cout << "shared[" << i << "]=" << recvShared[i] << endl; |
1153 |
} |
1154 |
cout << "--- snd_shcomp ---" << endl; |
1155 |
for (size_t i=0; i<sendShared.size(); i++) { |
1156 |
cout << "shared[" << i << "]=" << sendShared[i] << endl; |
1157 |
} |
1158 |
cout << "--- dofMap ---" << endl; |
1159 |
for (size_t i=0; i<m_dofMap.size(); i++) { |
1160 |
cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl; |
1161 |
} |
1162 |
cout << "--- colIndices ---" << endl; |
1163 |
for (size_t i=0; i<colIndices.size(); i++) { |
1164 |
cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl; |
1165 |
} |
1166 |
*/ |
1167 |
|
1168 |
/* |
1169 |
cout << "--- main_pattern ---" << endl; |
1170 |
cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl; |
1171 |
for (size_t i=0; i<mainPattern->numOutput+1; i++) { |
1172 |
cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl; |
1173 |
} |
1174 |
for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) { |
1175 |
cout << "index[" << i << "]=" << mainPattern->index[i] << endl; |
1176 |
} |
1177 |
*/ |
1178 |
|
1179 |
/* |
1180 |
cout << "--- colCouple_pattern ---" << endl; |
1181 |
cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl; |
1182 |
for (size_t i=0; i<colPattern->numOutput+1; i++) { |
1183 |
cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl; |
1184 |
} |
1185 |
for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) { |
1186 |
cout << "index[" << i << "]=" << colPattern->index[i] << endl; |
1187 |
} |
1188 |
*/ |
1189 |
|
1190 |
/* |
1191 |
cout << "--- rowCouple_pattern ---" << endl; |
1192 |
cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl; |
1193 |
for (size_t i=0; i<rowPattern->numOutput+1; i++) { |
1194 |
cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl; |
1195 |
} |
1196 |
for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) { |
1197 |
cout << "index[" << i << "]=" << rowPattern->index[i] << endl; |
1198 |
} |
1199 |
*/ |
1200 |
|
1201 |
Paso_Pattern_free(mainPattern); |
1202 |
Paso_Pattern_free(colPattern); |
1203 |
Paso_Pattern_free(rowPattern); |
1204 |
} |
1205 |
|
1206 |
//protected |
1207 |
void Rectangle::interpolateNodesOnElements(escript::Data& out, |
1208 |
escript::Data& in, bool reduced) const |
1209 |
{ |
1210 |
const dim_t numComp = in.getDataPointSize(); |
1211 |
if (reduced) { |
1212 |
/*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */ |
1213 |
const double c0 = .25; |
1214 |
#pragma omp parallel for |
1215 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
1216 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
1217 |
const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); |
1218 |
const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); |
1219 |
const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); |
1220 |
const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); |
1221 |
double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); |
1222 |
for (index_t i=0; i < numComp; ++i) { |
1223 |
o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]); |
1224 |
} /* end of component loop i */ |
1225 |
} /* end of k0 loop */ |
1226 |
} /* end of k1 loop */ |
1227 |
/* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */ |
1228 |
} else { |
1229 |
/*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */ |
1230 |
const double c0 = .16666666666666666667; |
1231 |
const double c1 = .044658198738520451079; |
1232 |
const double c2 = .62200846792814621559; |
1233 |
#pragma omp parallel for |
1234 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
1235 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
1236 |
const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); |
1237 |
const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); |
1238 |
const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); |
1239 |
const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); |
1240 |
double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); |
1241 |
for (index_t i=0; i < numComp; ++i) { |
1242 |
o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]); |
1243 |
o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]); |
1244 |
o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]); |
1245 |
o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]); |
1246 |
} /* end of component loop i */ |
1247 |
} /* end of k0 loop */ |
1248 |
} /* end of k1 loop */ |
1249 |
/* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */ |
1250 |
} |
1251 |
} |
1252 |
|
1253 |
//protected |
1254 |
void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in, |
1255 |
bool reduced) const |
1256 |
{ |
1257 |
const dim_t numComp = in.getDataPointSize(); |
1258 |
if (reduced) { |
1259 |
const double c0 = .5; |
1260 |
#pragma omp parallel |
1261 |
{ |
1262 |
/*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */ |
1263 |
if (m_faceOffset[0] > -1) { |
1264 |
#pragma omp for nowait |
1265 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
1266 |
const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); |
1267 |
const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); |
1268 |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1269 |
for (index_t i=0; i < numComp; ++i) { |
1270 |
o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]); |
1271 |
} /* end of component loop i */ |
1272 |
} /* end of k1 loop */ |
1273 |
} /* end of face 0 */ |
1274 |
if (m_faceOffset[1] > -1) { |
1275 |
#pragma omp for nowait |
1276 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
1277 |
const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); |
1278 |
const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); |
1279 |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1280 |
for (index_t i=0; i < numComp; ++i) { |
1281 |
o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]); |
1282 |
} /* end of component loop i */ |
1283 |
} /* end of k1 loop */ |
1284 |
} /* end of face 1 */ |
1285 |
if (m_faceOffset[2] > -1) { |
1286 |
#pragma omp for nowait |
1287 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
1288 |
const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); |
1289 |
const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); |
1290 |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1291 |
for (index_t i=0; i < numComp; ++i) { |
1292 |
o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]); |
1293 |
} /* end of component loop i */ |
1294 |
} /* end of k0 loop */ |
1295 |
} /* end of face 2 */ |
1296 |
if (m_faceOffset[3] > -1) { |
1297 |
#pragma omp for nowait |
1298 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
1299 |
const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); |
1300 |
const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); |
1301 |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1302 |
for (index_t i=0; i < numComp; ++i) { |
1303 |
o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]); |
1304 |
} /* end of component loop i */ |
1305 |
} /* end of k0 loop */ |
1306 |
} /* end of face 3 */ |
1307 |
/* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */ |
1308 |
} // end of parallel section |
1309 |
} else { |
1310 |
const double c0 = 0.21132486540518711775; |
1311 |
const double c1 = 0.78867513459481288225; |
1312 |
#pragma omp parallel |
1313 |
{ |
1314 |
/*** GENERATOR SNIP_INTERPOLATE_FACES TOP */ |
1315 |
if (m_faceOffset[0] > -1) { |
1316 |
#pragma omp for nowait |
1317 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
1318 |
const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); |
1319 |
const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); |
1320 |
double* o = out.getSampleDataRW(m_faceOffset[0]+k1); |
1321 |
for (index_t i=0; i < numComp; ++i) { |
1322 |
o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0; |
1323 |
o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1; |
1324 |
} /* end of component loop i */ |
1325 |
} /* end of k1 loop */ |
1326 |
} /* end of face 0 */ |
1327 |
if (m_faceOffset[1] > -1) { |
1328 |
#pragma omp for nowait |
1329 |
for (index_t k1=0; k1 < m_NE1; ++k1) { |
1330 |
const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); |
1331 |
const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); |
1332 |
double* o = out.getSampleDataRW(m_faceOffset[1]+k1); |
1333 |
for (index_t i=0; i < numComp; ++i) { |
1334 |
o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0; |
1335 |
o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1; |
1336 |
} /* end of component loop i */ |
1337 |
} /* end of k1 loop */ |
1338 |
} /* end of face 1 */ |
1339 |
if (m_faceOffset[2] > -1) { |
1340 |
#pragma omp for nowait |
1341 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
1342 |
const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); |
1343 |
const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); |
1344 |
double* o = out.getSampleDataRW(m_faceOffset[2]+k0); |
1345 |
for (index_t i=0; i < numComp; ++i) { |
1346 |
o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0; |
1347 |
o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1; |
1348 |
} /* end of component loop i */ |
1349 |
} /* end of k0 loop */ |
1350 |
} /* end of face 2 */ |
1351 |
if (m_faceOffset[3] > -1) { |
1352 |
#pragma omp for nowait |
1353 |
for (index_t k0=0; k0 < m_NE0; ++k0) { |
1354 |
const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); |
1355 |
const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); |
1356 |
double* o = out.getSampleDataRW(m_faceOffset[3]+k0); |
1357 |
for (index_t i=0; i < numComp; ++i) { |
1358 |
o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0; |
1359 |
o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1; |
1360 |
} /* end of component loop i */ |
1361 |
} /* end of k0 loop */ |
1362 |
} /* end of face 3 */ |
1363 |
/* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */ |
1364 |
} // end of parallel section |
1365 |
} |
1366 |
} |
1367 |
|
1368 |
//protected |
1369 |
void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, |
1370 |
escript::Data& rhs, const escript::Data& A, const escript::Data& B, |
1371 |
const escript::Data& C, const escript::Data& D, |
1372 |
const escript::Data& X, const escript::Data& Y, |
1373 |
const escript::Data& d, const escript::Data& y) const |
1374 |
{ |
1375 |
const double h0 = m_l0/m_gNE0; |
1376 |
const double h1 = m_l1/m_gNE1; |
1377 |
/*** GENERATOR SNIP_PDE_SINGLE_PRE TOP */ |
1378 |
const double w0 = -0.1555021169820365539*h1/h0; |
1379 |
const double w1 = 0.041666666666666666667; |
1380 |
const double w10 = -0.041666666666666666667*h0/h1; |
1381 |
const double w11 = 0.1555021169820365539*h1/h0; |
1382 |
const double w12 = 0.1555021169820365539*h0/h1; |
1383 |
const double w13 = 0.01116454968463011277*h0/h1; |
1384 |
const double w14 = 0.01116454968463011277*h1/h0; |
1385 |
const double w15 = 0.041666666666666666667*h1/h0; |
1386 |
const double w16 = -0.01116454968463011277*h0/h1; |
1387 |
const double w17 = -0.1555021169820365539*h0/h1; |
1388 |
const double w18 = -0.33333333333333333333*h1/h0; |
1389 |
const double w19 = 0.25000000000000000000; |
1390 |
const double w2 = -0.15550211698203655390; |
1391 |
const double w20 = -0.25000000000000000000; |
1392 |
const double w21 = 0.16666666666666666667*h0/h1; |
1393 |
const double w22 = -0.16666666666666666667*h1/h0; |
1394 |
const double w23 = -0.16666666666666666667*h0/h1; |
1395 |
const double w24 = 0.33333333333333333333*h1/h0; |
1396 |
const double w25 = 0.33333333333333333333*h0/h1; |
1397 |
const double w26 = 0.16666666666666666667*h1/h0; |
1398 |
const double w27 = -0.33333333333333333333*h0/h1; |
1399 |
const double w28 = -0.032861463941450536761*h1; |
1400 |
const double w29 = -0.032861463941450536761*h0; |
1401 |
const double w3 = 0.041666666666666666667*h0/h1; |
1402 |
const double w30 = -0.12264065304058601714*h1; |
1403 |
const double w31 = -0.0023593469594139828636*h1; |
1404 |
const double w32 = -0.008805202725216129906*h0; |
1405 |
const double w33 = -0.008805202725216129906*h1; |
1406 |
const double w34 = 0.032861463941450536761*h1; |
1407 |
const double w35 = 0.008805202725216129906*h1; |
1408 |
const double w36 = 0.008805202725216129906*h0; |
1409 |
const double w37 = 0.0023593469594139828636*h1; |
1410 |
const double w38 = 0.12264065304058601714*h1; |
1411 |
const double w39 = 0.032861463941450536761*h0; |
1412 |
const double w4 = 0.15550211698203655390; |
1413 |
const double w40 = -0.12264065304058601714*h0; |
1414 |
const double w41 = -0.0023593469594139828636*h0; |
1415 |
const double w42 = 0.0023593469594139828636*h0; |
1416 |
const double w43 = 0.12264065304058601714*h0; |
1417 |
const double w44 = -0.16666666666666666667*h1; |
1418 |
const double w45 = -0.083333333333333333333*h0; |
1419 |
const double w46 = 0.083333333333333333333*h1; |
1420 |
const double w47 = 0.16666666666666666667*h1; |
1421 |
const double w48 = 0.083333333333333333333*h0; |
1422 |
const double w49 = -0.16666666666666666667*h0; |
1423 |
const double w5 = -0.041666666666666666667; |
1424 |
const double w50 = 0.16666666666666666667*h0; |
1425 |
const double w51 = -0.083333333333333333333*h1; |
1426 |
const double w52 = 0.025917019497006092316*h0*h1; |
1427 |
const double w53 = 0.0018607582807716854616*h0*h1; |
1428 |
const double w54 = 0.0069444444444444444444*h0*h1; |
1429 |
const double w55 = 0.09672363354357992482*h0*h1; |
1430 |
const double w56 = 0.00049858867864229740201*h0*h1; |
1431 |
const double w57 = 0.055555555555555555556*h0*h1; |
1432 |
const double w58 = 0.027777777777777777778*h0*h1; |
1433 |
const double w59 = 0.11111111111111111111*h0*h1; |
1434 |
const double w6 = -0.01116454968463011277*h1/h0; |
1435 |
const double w60 = -0.19716878364870322056*h1; |
1436 |
const double w61 = -0.19716878364870322056*h0; |
1437 |
const double w62 = -0.052831216351296779436*h0; |
1438 |
const double w63 = -0.052831216351296779436*h1; |
1439 |
const double w64 = 0.19716878364870322056*h1; |
1440 |
const double w65 = 0.052831216351296779436*h1; |
1441 |
const double w66 = 0.19716878364870322056*h0; |
1442 |
const double w67 = 0.052831216351296779436*h0; |
1443 |
const double w68 = -0.5*h1; |
1444 |
const double w69 = -0.5*h0; |
1445 |
const double w7 = 0.011164549684630112770; |
1446 |
const double w70 = 0.5*h1; |
1447 |
const double w71 = 0.5*h0; |
1448 |
const double w72 = 0.1555021169820365539*h0*h1; |
1449 |
const double w73 = 0.041666666666666666667*h0*h1; |
1450 |
const double w74 = 0.01116454968463011277*h0*h1; |
1451 |
const double w75 = 0.25*h0*h1; |
1452 |
const double w8 = -0.011164549684630112770; |
1453 |
const double w9 = -0.041666666666666666667*h1/h0; |
1454 |
/* GENERATOR SNIP_PDE_SINGLE_PRE BOTTOM */ |
1455 |
|
1456 |
rhs.requireWrite(); |
1457 |
#pragma omp parallel |
1458 |
{ |
1459 |
for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring |
1460 |
#pragma omp for |
1461 |
for (index_t k1=k1_0; k1<m_NE1; k1+=2) { |
1462 |
for (index_t k0=0; k0<m_NE0; ++k0) { |
1463 |
bool add_EM_S=false; |
1464 |
bool add_EM_F=false; |
1465 |
vector<double> EM_S(4*4, 0); |
1466 |
vector<double> EM_F(4, 0); |
1467 |
const index_t e = k0 + m_NE0*k1; |
1468 |
/*** GENERATOR SNIP_PDE_SINGLE TOP */ |
1469 |
/////////////// |
1470 |
// process A // |
1471 |
/////////////// |
1472 |
if (!A.isEmpty()) { |
1473 |
add_EM_S=true; |
1474 |
const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); |
1475 |
if (A.actsExpanded()) { |
1476 |
const register double A_00_0 = A_p[INDEX3(0,0,0,2,2)]; |
1477 |
const register double A_01_0 = A_p[INDEX3(0,1,0,2,2)]; |
1478 |
const register double A_10_0 = A_p[INDEX3(1,0,0,2,2)]; |
1479 |
const register double A_11_0 = A_p[INDEX3(1,1,0,2,2)]; |
1480 |
const register double A_00_1 = A_p[INDEX3(0,0,1,2,2)]; |
1481 |
const register double A_01_1 = A_p[INDEX3(0,1,1,2,2)]; |
1482 |
const register double A_10_1 = A_p[INDEX3(1,0,1,2,2)]; |
1483 |
const register double A_11_1 = A_p[INDEX3(1,1,1,2,2)]; |
1484 |
const register double A_00_2 = A_p[INDEX3(0,0,2,2,2)]; |
1485 |
const register double A_01_2 = A_p[INDEX3(0,1,2,2,2)]; |
1486 |
const register double A_10_2 = A_p[INDEX3(1,0,2,2,2)]; |
1487 |
const register double A_11_2 = A_p[INDEX3(1,1,2,2,2)]; |
1488 |
const register double A_00_3 = A_p[INDEX3(0,0,3,2,2)]; |
1489 |
const register double A_01_3 = A_p[INDEX3(0,1,3,2,2)]; |
1490 |
const register double A_10_3 = A_p[INDEX3(1,0,3,2,2)]; |
1491 |
const register double A_11_3 = A_p[INDEX3(1,1,3,2,2)]; |
1492 |
const register double tmp4_0 = A_10_1 + A_10_2; |
1493 |
const register double tmp12_0 = A_11_0 + A_11_2; |
1494 |
const register double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; |
1495 |
const register double tmp10_0 = A_01_3 + A_10_3; |
1496 |
const register double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; |
1497 |
const register double tmp0_0 = A_01_0 + A_01_3; |
1498 |
const register double tmp13_0 = A_01_2 + A_10_1; |
1499 |
const register double tmp3_0 = A_00_2 + A_00_3; |
1500 |
const register double tmp11_0 = A_11_1 + A_11_3; |
1501 |
const register double tmp18_0 = A_01_1 + A_10_1; |
1502 |
const register double tmp1_0 = A_00_0 + A_00_1; |
1503 |
const register double tmp15_0 = A_01_1 + A_10_2; |
1504 |
const register double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; |
1505 |
const register double tmp16_0 = A_10_0 + A_10_3; |
1506 |
const register double tmp6_0 = A_01_3 + A_10_0; |
1507 |
const register double tmp17_0 = A_01_1 + A_01_2; |
1508 |
const register double tmp9_0 = A_01_0 + A_10_0; |
1509 |
const register double tmp7_0 = A_01_0 + A_10_3; |
1510 |
const register double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; |
1511 |
const register double tmp19_0 = A_01_2 + A_10_2; |
1512 |
const register double tmp14_1 = A_10_0*w8; |
1513 |
const register double tmp23_1 = tmp3_0*w14; |
1514 |
const register double tmp35_1 = A_01_0*w8; |
1515 |
const register double tmp54_1 = tmp13_0*w8; |
1516 |
const register double tmp20_1 = tmp9_0*w4; |
1517 |
const register double tmp25_1 = tmp12_0*w12; |
1518 |
const register double tmp2_1 = A_01_1*w4; |
1519 |
const register double tmp44_1 = tmp7_0*w7; |
1520 |
const register double tmp26_1 = tmp10_0*w4; |
1521 |
const register double tmp52_1 = tmp18_0*w8; |
1522 |
const register double tmp48_1 = A_10_1*w7; |
1523 |
const register double tmp46_1 = A_01_3*w8; |
1524 |
const register double tmp50_1 = A_01_0*w2; |
1525 |
const register double tmp8_1 = tmp4_0*w5; |
1526 |
const register double tmp56_1 = tmp19_0*w8; |
1527 |
const register double tmp9_1 = tmp2_0*w10; |
1528 |
const register double tmp19_1 = A_10_3*w2; |
1529 |
const register double tmp47_1 = A_10_2*w4; |
1530 |
const register double tmp16_1 = tmp3_0*w0; |
1531 |
const register double tmp18_1 = tmp1_0*w6; |
1532 |
const register double tmp31_1 = tmp11_0*w12; |
1533 |
const register double tmp55_1 = tmp15_0*w2; |
1534 |
const register double tmp39_1 = A_10_2*w7; |
1535 |
const register double tmp11_1 = tmp6_0*w7; |
1536 |
const register double tmp40_1 = tmp11_0*w17; |
1537 |
const register double tmp34_1 = tmp15_0*w8; |
1538 |
const register double tmp33_1 = tmp14_0*w5; |
1539 |
const register double tmp24_1 = tmp11_0*w13; |
1540 |
const register double tmp3_1 = tmp1_0*w0; |
1541 |
const register double tmp5_1 = tmp2_0*w3; |
1542 |
const register double tmp43_1 = tmp17_0*w5; |
1543 |
const register double tmp15_1 = A_01_2*w4; |
1544 |
const register double tmp53_1 = tmp19_0*w2; |
1545 |
const register double tmp27_1 = tmp3_0*w11; |
1546 |
const register double tmp32_1 = tmp13_0*w2; |
1547 |
const register double tmp10_1 = tmp5_0*w9; |
1548 |
const register double tmp37_1 = A_10_1*w4; |
1549 |
const register double tmp38_1 = tmp5_0*w15; |
1550 |
const register double tmp17_1 = A_01_1*w7; |
1551 |
const register double tmp12_1 = tmp7_0*w4; |
1552 |
const register double tmp22_1 = tmp10_0*w7; |
1553 |
const register double tmp57_1 = tmp18_0*w2; |
1554 |
const register double tmp28_1 = tmp9_0*w7; |
1555 |
const register double tmp29_1 = tmp1_0*w14; |
1556 |
const register double tmp51_1 = tmp11_0*w16; |
1557 |
const register double tmp42_1 = tmp12_0*w16; |
1558 |
const register double tmp49_1 = tmp12_0*w17; |
1559 |
const register double tmp21_1 = tmp1_0*w11; |
1560 |
const register double tmp1_1 = tmp0_0*w1; |
1561 |
const register double tmp45_1 = tmp6_0*w4; |
1562 |
const register double tmp7_1 = A_10_0*w2; |
1563 |
const register double tmp6_1 = tmp3_0*w6; |
1564 |
const register double tmp13_1 = tmp8_0*w1; |
1565 |
const register double tmp36_1 = tmp16_0*w1; |
1566 |
const register double tmp41_1 = A_01_3*w2; |
1567 |
const register double tmp30_1 = tmp12_0*w13; |
1568 |
const register double tmp4_1 = A_01_2*w7; |
1569 |
const register double tmp0_1 = A_10_3*w8; |
1570 |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; |
1571 |
EM_S[INDEX2(1,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; |
1572 |
EM_S[INDEX2(3,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; |
1573 |
EM_S[INDEX2(0,0,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; |
1574 |
EM_S[INDEX2(3,3,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1575 |
EM_S[INDEX2(3,0,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; |
1576 |
EM_S[INDEX2(3,1,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; |
1577 |
EM_S[INDEX2(2,1,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; |
1578 |
EM_S[INDEX2(0,2,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
1579 |
EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; |
1580 |
EM_S[INDEX2(1,3,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; |
1581 |
EM_S[INDEX2(2,3,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; |
1582 |
EM_S[INDEX2(2,2,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; |
1583 |
EM_S[INDEX2(1,0,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; |
1584 |
EM_S[INDEX2(0,3,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; |
1585 |
EM_S[INDEX2(1,1,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; |
1586 |
} else { /* constant data */ |
1587 |
const register double A_00 = A_p[INDEX2(0,0,2)]; |
1588 |
const register double A_01 = A_p[INDEX2(0,1,2)]; |
1589 |
const register double A_10 = A_p[INDEX2(1,0,2)]; |
1590 |
const register double A_11 = A_p[INDEX2(1,1,2)]; |
1591 |
const register double tmp0_0 = A_01 + A_10; |
1592 |
const register double tmp0_1 = A_00*w18; |
1593 |
const register double tmp10_1 = A_01*w20; |
1594 |
const register double tmp12_1 = A_00*w26; |
1595 |
const register double tmp4_1 = A_00*w22; |
1596 |
const register double tmp8_1 = A_00*w24; |
1597 |
const register double tmp13_1 = A_10*w19; |
1598 |
const register double tmp9_1 = tmp0_0*w20; |
1599 |
const register double tmp3_1 = A_11*w21; |
1600 |
const register double tmp11_1 = A_11*w27; |
1601 |
const register double tmp1_1 = A_01*w19; |
1602 |
const register double tmp6_1 = A_11*w23; |
1603 |
const register double tmp7_1 = A_11*w25; |
1604 |
const register double tmp2_1 = A_10*w20; |
1605 |
const register double tmp5_1 = tmp0_0*w19; |
1606 |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
1607 |
EM_S[INDEX2(1,2,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
1608 |
EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
1609 |
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
1610 |
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp7_1 + tmp8_1; |
1611 |
EM_S[INDEX2(3,0,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
1612 |
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
1613 |
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp5_1 + tmp6_1; |
1614 |
EM_S[INDEX2(0,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; |
1615 |
EM_S[INDEX2(2,0,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
1616 |
EM_S[INDEX2(1,3,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; |
1617 |
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
1618 |
EM_S[INDEX2(2,2,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
1619 |
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; |
1620 |
EM_S[INDEX2(0,3,4)]+=tmp4_1 + tmp6_1 + tmp9_1; |
1621 |
EM_S[INDEX2(1,1,4)]+=tmp7_1 + tmp8_1 + tmp9_1; |
1622 |
} |
1623 |
} |
1624 |
/////////////// |
1625 |
// process B // |
1626 |
/////////////// |
1627 |
if (!B.isEmpty()) { |
1628 |
add_EM_S=true; |
1629 |
const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); |
1630 |
if (B.actsExpanded()) { |
1631 |
const register double B_0_0 = B_p[INDEX2(0,0,2)]; |
1632 |
const register double B_1_0 = B_p[INDEX2(1,0,2)]; |
1633 |
const register double B_0_1 = B_p[INDEX2(0,1,2)]; |
1634 |
const register double B_1_1 = B_p[INDEX2(1,1,2)]; |
1635 |
const register double B_0_2 = B_p[INDEX2(0,2,2)]; |
1636 |
const register double B_1_2 = B_p[INDEX2(1,2,2)]; |
1637 |
const register double B_0_3 = B_p[INDEX2(0,3,2)]; |
1638 |
const register double B_1_3 = B_p[INDEX2(1,3,2)]; |
1639 |
const register double tmp3_0 = B_0_0 + B_0_2; |
1640 |
const register double tmp1_0 = B_1_2 + B_1_3; |
1641 |
const register double tmp2_0 = B_0_1 + B_0_3; |
1642 |
const register double tmp0_0 = B_1_0 + B_1_1; |
1643 |
const register double tmp63_1 = B_1_1*w42; |
1644 |
const register double tmp79_1 = B_1_1*w40; |
1645 |
const register double tmp37_1 = tmp3_0*w35; |
1646 |
const register double tmp8_1 = tmp0_0*w32; |
1647 |
const register double tmp71_1 = B_0_1*w34; |
1648 |
const register double tmp19_1 = B_0_3*w31; |
1649 |
const register double tmp15_1 = B_0_3*w34; |
1650 |
const register double tmp9_1 = tmp3_0*w34; |
1651 |
const register double tmp35_1 = B_1_0*w36; |
1652 |
const register double tmp66_1 = B_0_3*w28; |
1653 |
const register double tmp28_1 = B_1_0*w42; |
1654 |
const register double tmp22_1 = B_1_0*w40; |
1655 |
const register double tmp16_1 = B_1_2*w29; |
1656 |
const register double tmp6_1 = tmp2_0*w35; |
1657 |
const register double tmp55_1 = B_1_3*w40; |
1658 |
const register double tmp50_1 = B_1_3*w42; |
1659 |
const register double tmp7_1 = tmp1_0*w29; |
1660 |
const register double tmp1_1 = tmp1_0*w32; |
1661 |
const register double tmp57_1 = B_0_3*w30; |
1662 |
const register double tmp18_1 = B_1_1*w32; |
1663 |
const register double tmp53_1 = B_1_0*w41; |
1664 |
const register double tmp61_1 = B_1_3*w36; |
1665 |
const register double tmp27_1 = B_0_3*w38; |
1666 |
const register double tmp64_1 = B_0_2*w30; |
1667 |
const register double tmp76_1 = B_0_1*w38; |
1668 |
const register double tmp39_1 = tmp2_0*w34; |
1669 |
const register double tmp62_1 = B_0_1*w31; |
1670 |
const register double tmp56_1 = B_0_0*w31; |
1671 |
const register double tmp49_1 = B_1_1*w36; |
1672 |
const register double tmp2_1 = B_0_2*w31; |
1673 |
const register double tmp23_1 = B_0_2*w33; |
1674 |
const register double tmp38_1 = B_1_1*w43; |
1675 |
const register double tmp74_1 = B_1_2*w41; |
1676 |
const register double tmp43_1 = B_1_1*w41; |
1677 |
const register double tmp58_1 = B_0_2*w28; |
1678 |
const register double tmp67_1 = B_0_0*w33; |
1679 |
const register double tmp33_1 = tmp0_0*w39; |
1680 |
const register double tmp4_1 = B_0_0*w28; |
1681 |
const register double tmp20_1 = B_0_0*w30; |
1682 |
const register double tmp13_1 = B_0_2*w38; |
1683 |
const register double tmp65_1 = B_1_2*w43; |
1684 |
const register double tmp0_1 = tmp0_0*w29; |
1685 |
const register double tmp41_1 = tmp3_0*w33; |
1686 |
const register double tmp73_1 = B_0_2*w37; |
1687 |
const register double tmp69_1 = B_0_0*w38; |
1688 |
const register double tmp48_1 = B_1_2*w39; |
1689 |
const register double tmp59_1 = B_0_1*w33; |
1690 |
const register double tmp17_1 = B_1_3*w41; |
1691 |
const register double tmp5_1 = B_0_3*w33; |
1692 |
const register double tmp3_1 = B_0_1*w30; |
1693 |
const register double tmp21_1 = B_0_1*w28; |
1694 |
const register double tmp42_1 = B_1_0*w29; |
1695 |
const register double tmp54_1 = B_1_2*w32; |
1696 |
const register double tmp60_1 = B_1_0*w39; |
1697 |
const register double tmp32_1 = tmp1_0*w36; |
1698 |
const register double tmp10_1 = B_0_1*w37; |
1699 |
const register double tmp14_1 = B_0_0*w35; |
1700 |
const register double tmp29_1 = B_0_1*w35; |
1701 |
const register double tmp26_1 = B_1_2*w36; |
1702 |
const register double tmp30_1 = B_1_3*w43; |
1703 |
const register double tmp70_1 = B_0_2*w35; |
1704 |
const register double tmp34_1 = B_1_3*w39; |
1705 |
const register double tmp51_1 = B_1_0*w43; |
1706 |
const register double tmp31_1 = B_0_2*w34; |
1707 |
const register double tmp45_1 = tmp3_0*w28; |
1708 |
const register double tmp11_1 = tmp1_0*w39; |
1709 |
const register double tmp52_1 = B_1_1*w29; |
1710 |
const register double tmp44_1 = B_1_3*w32; |
1711 |
const register double tmp25_1 = B_1_1*w39; |
1712 |
const register double tmp47_1 = tmp2_0*w33; |
1713 |
const register double tmp72_1 = B_1_3*w29; |
1714 |
const register double tmp40_1 = tmp2_0*w28; |
1715 |
const register double tmp46_1 = B_1_2*w40; |
1716 |
const register double tmp36_1 = B_1_2*w42; |
1717 |
const register double tmp24_1 = B_0_0*w37; |
1718 |
const register double tmp77_1 = B_0_3*w35; |
1719 |
const register double tmp68_1 = B_0_3*w37; |
1720 |
const register double tmp78_1 = B_0_0*w34; |
1721 |
const register double tmp12_1 = tmp0_0*w36; |
1722 |
const register double tmp75_1 = B_1_0*w32; |
1723 |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
1724 |
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
1725 |
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
1726 |
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
1727 |
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1728 |
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; |
1729 |
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
1730 |
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
1731 |
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
1732 |
EM_S[INDEX2(2,0,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
1733 |
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
1734 |
EM_S[INDEX2(2,3,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
1735 |
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
1736 |
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
1737 |
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; |
1738 |
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
1739 |
} else { /* constant data */ |
1740 |
const register double B_0 = B_p[0]; |
1741 |
const register double B_1 = B_p[1]; |
1742 |
const register double tmp6_1 = B_1*w50; |
1743 |
const register double tmp1_1 = B_1*w45; |
1744 |
const register double tmp5_1 = B_1*w49; |
1745 |
const register double tmp4_1 = B_1*w48; |
1746 |
const register double tmp0_1 = B_0*w44; |
1747 |
const register double tmp2_1 = B_0*w46; |
1748 |
const register double tmp7_1 = B_0*w51; |
1749 |
const register double tmp3_1 = B_0*w47; |
1750 |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
1751 |
EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; |
1752 |
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; |
1753 |
EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp5_1; |
1754 |
EM_S[INDEX2(3,3,4)]+=tmp3_1 + tmp6_1; |
1755 |
EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp4_1; |
1756 |
EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp6_1; |
1757 |
EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp7_1; |
1758 |
EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp7_1; |
1759 |
EM_S[INDEX2(2,0,4)]+=tmp6_1 + tmp7_1; |
1760 |
EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp5_1; |
1761 |
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp4_1; |
1762 |
EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1; |
1763 |
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; |
1764 |
EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp7_1; |
1765 |
EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp5_1; |
1766 |
} |
1767 |
} |
1768 |
/////////////// |
1769 |
// process C // |
1770 |
/////////////// |
1771 |
if (!C.isEmpty()) { |
1772 |
add_EM_S=true; |
1773 |
const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); |
1774 |
if (C.actsExpanded()) { |
1775 |
const register double C_0_0 = C_p[INDEX2(0,0,2)]; |
1776 |
const register double C_1_0 = C_p[INDEX2(1,0,2)]; |
1777 |
const register double C_0_1 = C_p[INDEX2(0,1,2)]; |
1778 |
const register double C_1_1 = C_p[INDEX2(1,1,2)]; |
1779 |
const register double C_0_2 = C_p[INDEX2(0,2,2)]; |
1780 |
const register double C_1_2 = C_p[INDEX2(1,2,2)]; |
1781 |
const register double C_0_3 = C_p[INDEX2(0,3,2)]; |
1782 |
const register double C_1_3 = C_p[INDEX2(1,3,2)]; |
1783 |
const register double tmp2_0 = C_0_1 + C_0_3; |
1784 |
const register double tmp1_0 = C_1_2 + C_1_3; |
1785 |
const register double tmp3_0 = C_0_0 + C_0_2; |
1786 |
const register double tmp0_0 = C_1_0 + C_1_1; |
1787 |
const register double tmp64_1 = C_0_2*w30; |
1788 |
const register double tmp14_1 = C_0_2*w28; |
1789 |
const register double tmp19_1 = C_0_3*w31; |
1790 |
const register double tmp22_1 = C_1_0*w40; |
1791 |
const register double tmp37_1 = tmp3_0*w35; |
1792 |
const register double tmp29_1 = C_0_1*w35; |
1793 |
const register double tmp73_1 = C_0_2*w37; |
1794 |
const register double tmp74_1 = C_1_2*w41; |
1795 |
const register double tmp52_1 = C_1_3*w39; |
1796 |
const register double tmp25_1 = C_1_1*w39; |
1797 |
const register double tmp62_1 = C_0_1*w31; |
1798 |
const register double tmp79_1 = C_1_1*w40; |
1799 |
const register double tmp43_1 = C_1_1*w36; |
1800 |
const register double tmp27_1 = C_0_3*w38; |
1801 |
const register double tmp28_1 = C_1_0*w42; |
1802 |
const register double tmp63_1 = C_1_1*w42; |
1803 |
const register double tmp59_1 = C_0_3*w34; |
1804 |
const register double tmp72_1 = C_1_3*w29; |
1805 |
const register double tmp40_1 = tmp2_0*w35; |
1806 |
const register double tmp13_1 = C_0_3*w30; |
1807 |
const register double tmp51_1 = C_1_2*w40; |
1808 |
const register double tmp54_1 = C_1_2*w42; |
1809 |
const register double tmp12_1 = C_0_0*w31; |
1810 |
const register double tmp2_1 = tmp1_0*w32; |
1811 |
const register double tmp68_1 = C_0_2*w31; |
1812 |
const register double tmp75_1 = C_1_0*w32; |
1813 |
const register double tmp49_1 = C_1_1*w41; |
1814 |
const register double tmp4_1 = C_0_2*w35; |
1815 |
const register double tmp66_1 = C_0_3*w28; |
1816 |
const register double tmp56_1 = C_0_1*w37; |
1817 |
const register double tmp5_1 = C_0_1*w34; |
1818 |
const register double tmp38_1 = tmp2_0*w34; |
1819 |
const register double tmp76_1 = C_0_1*w38; |
1820 |
const register double tmp21_1 = C_0_1*w28; |
1821 |
const register double tmp69_1 = C_0_1*w30; |
1822 |
const register double tmp53_1 = C_1_0*w36; |
1823 |
const register double tmp42_1 = C_1_2*w39; |
1824 |
const register double tmp32_1 = tmp1_0*w29; |
1825 |
const register double tmp45_1 = C_1_0*w43; |
1826 |
const register double tmp33_1 = tmp0_0*w32; |
1827 |
const register double tmp35_1 = C_1_0*w41; |
1828 |
const register double tmp26_1 = C_1_2*w36; |
1829 |
const register double tmp67_1 = C_0_0*w33; |
1830 |
const register double tmp31_1 = C_0_2*w34; |
1831 |
const register double tmp20_1 = C_0_0*w30; |
1832 |
const register double tmp70_1 = C_0_0*w28; |
1833 |
const register double tmp8_1 = tmp0_0*w39; |
1834 |
const register double tmp30_1 = C_1_3*w43; |
1835 |
const register double tmp0_1 = tmp0_0*w29; |
1836 |
const register double tmp17_1 = C_1_3*w41; |
1837 |
const register double tmp58_1 = C_0_0*w35; |
1838 |
const register double tmp9_1 = tmp3_0*w33; |
1839 |
const register double tmp61_1 = C_1_3*w36; |
1840 |
const register double tmp41_1 = tmp3_0*w34; |
1841 |
const register double tmp50_1 = C_1_3*w32; |
1842 |
const register double tmp18_1 = C_1_1*w32; |
1843 |
const register double tmp6_1 = tmp1_0*w36; |
1844 |
const register double tmp3_1 = C_0_0*w38; |
1845 |
const register double tmp34_1 = C_1_1*w29; |
1846 |
const register double tmp77_1 = C_0_3*w35; |
1847 |
const register double tmp65_1 = C_1_2*w43; |
1848 |
const register double tmp71_1 = C_0_3*w33; |
1849 |
const register double tmp55_1 = C_1_1*w43; |
1850 |
const register double tmp46_1 = tmp3_0*w28; |
1851 |
const register double tmp24_1 = C_0_0*w37; |
1852 |
const register double tmp10_1 = tmp1_0*w39; |
1853 |
const register double tmp48_1 = C_1_0*w29; |
1854 |
const register double tmp15_1 = C_0_1*w33; |
1855 |
const register double tmp36_1 = C_1_2*w32; |
1856 |
const register double tmp60_1 = C_1_0*w39; |
1857 |
const register double tmp47_1 = tmp2_0*w33; |
1858 |
const register double tmp16_1 = C_1_2*w29; |
1859 |
const register double tmp1_1 = C_0_3*w37; |
1860 |
const register double tmp7_1 = tmp2_0*w28; |
1861 |
const register double tmp39_1 = C_1_3*w40; |
1862 |
const register double tmp44_1 = C_1_3*w42; |
1863 |
const register double tmp57_1 = C_0_2*w38; |
1864 |
const register double tmp78_1 = C_0_0*w34; |
1865 |
const register double tmp11_1 = tmp0_0*w36; |
1866 |
const register double tmp23_1 = C_0_2*w33; |
1867 |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; |
1868 |
EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; |
1869 |
EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
1870 |
EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; |
1871 |
EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; |
1872 |
EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; |
1873 |
EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; |
1874 |
EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; |
1875 |
EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; |
1876 |
EM_S[INDEX2(2,0,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; |
1877 |
EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; |
1878 |
EM_S[INDEX2(2,3,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; |
1879 |
EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; |
1880 |
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; |
1881 |
EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; |
1882 |
EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; |
1883 |
} else { /* constant data */ |
1884 |
const register double C_0 = C_p[0]; |
1885 |
const register double C_1 = C_p[1]; |
1886 |
const register double tmp1_1 = C_1*w45; |
1887 |
const register double tmp3_1 = C_0*w51; |
1888 |
const register double tmp4_1 = C_0*w44; |
1889 |
const register double tmp7_1 = C_0*w46; |
1890 |
const register double tmp5_1 = C_1*w49; |
1891 |
const register double tmp2_1 = C_1*w48; |
1892 |
const register double tmp0_1 = C_0*w47; |
1893 |
const register double tmp6_1 = C_1*w50; |
1894 |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
1895 |
EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; |
1896 |
EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp4_1; |
1897 |
EM_S[INDEX2(0,0,4)]+=tmp4_1 + tmp5_1; |
1898 |
EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp6_1; |
1899 |
EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; |
1900 |
EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp7_1; |
1901 |
EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp7_1; |
1902 |
EM_S[INDEX2(0,2,4)]+=tmp3_1 + tmp6_1; |
1903 |
EM_S[INDEX2(2,0,4)]+=tmp3_1 + tmp5_1; |
1904 |
EM_S[INDEX2(1,3,4)]+=tmp6_1 + tmp7_1; |
1905 |
EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; |
1906 |
EM_S[INDEX2(2,2,4)]+=tmp4_1 + tmp6_1; |
1907 |
EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp4_1; |
1908 |
EM_S[INDEX2(0,3,4)]+=tmp2_1 + tmp7_1; |
1909 |
EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp5_1; |
1910 |
} |
1911 |
} |
1912 |
/////////////// |
1913 |
// process D // |
1914 |
/////////////// |
1915 |
if (!D.isEmpty()) { |
1916 |
add_EM_S=true; |
1917 |
const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); |
1918 |
if (D.actsExpanded()) { |
1919 |
const register double D_0 = D_p[0]; |
1920 |
const register double D_1 = D_p[1]; |
1921 |
const register double D_2 = D_p[2]; |
1922 |
const register double D_3 = D_p[3]; |
1923 |
const register double tmp4_0 = D_1 + D_3; |
1924 |
const register double tmp2_0 = D_0 + D_1 + D_2 + D_3; |
1925 |
const register double tmp5_0 = D_0 + D_2; |
1926 |
const register double tmp0_0 = D_0 + D_1; |
1927 |
const register double tmp6_0 = D_0 + D_3; |
1928 |
const register double tmp1_0 = D_2 + D_3; |
1929 |
const register double tmp3_0 = D_1 + D_2; |
1930 |
const register double tmp16_1 = D_1*w56; |
1931 |
const register double tmp14_1 = tmp6_0*w54; |
1932 |
const register double tmp8_1 = D_3*w55; |
1933 |
const register double tmp2_1 = tmp2_0*w54; |
1934 |
const register double tmp12_1 = tmp5_0*w52; |
1935 |
const register double tmp4_1 = tmp0_0*w53; |
1936 |
const register double tmp3_1 = tmp1_0*w52; |
1937 |
const register double tmp13_1 = tmp4_0*w53; |
1938 |
const register double tmp10_1 = tmp4_0*w52; |
1939 |
const register double tmp1_1 = tmp1_0*w53; |
1940 |
const register double tmp7_1 = D_3*w56; |
1941 |
const register double tmp0_1 = tmp0_0*w52; |
1942 |
const register double tmp11_1 = tmp5_0*w53; |
1943 |
const register double tmp9_1 = D_0*w56; |
1944 |
const register double tmp5_1 = tmp3_0*w54; |
1945 |
const register double tmp18_1 = D_2*w56; |
1946 |
const register double tmp17_1 = D_1*w55; |
1947 |
const register double tmp6_1 = D_0*w55; |
1948 |
const register double tmp15_1 = D_2*w55; |
1949 |
EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; |
1950 |
EM_S[INDEX2(1,2,4)]+=tmp2_1; |
1951 |
EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; |
1952 |
EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp6_1 + tmp7_1; |
1953 |
EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp8_1 + tmp9_1; |
1954 |
EM_S[INDEX2(3,0,4)]+=tmp2_1; |
1955 |
EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1; |
1956 |
EM_S[INDEX2(2,1,4)]+=tmp2_1; |
1957 |
EM_S[INDEX2(0,2,4)]+=tmp12_1 + tmp13_1; |
1958 |
EM_S[INDEX2(2,0,4)]+=tmp12_1 + tmp13_1; |
1959 |
EM_S[INDEX2(1,3,4)]+=tmp10_1 + tmp11_1; |
1960 |
EM_S[INDEX2(2,3,4)]+=tmp3_1 + tmp4_1; |
1961 |
EM_S[INDEX2(2,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1; |
1962 |
EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1; |
1963 |
EM_S[INDEX2(0,3,4)]+=tmp2_1; |
1964 |
EM_S[INDEX2(1,1,4)]+=tmp14_1 + tmp17_1 + tmp18_1; |
1965 |
} else { /* constant data */ |
1966 |
const register double D_0 = D_p[0]; |
1967 |
const register double tmp2_1 = D_0*w59; |
1968 |
const register double tmp1_1 = D_0*w58; |
1969 |
const register double tmp0_1 = D_0*w57; |
1970 |
EM_S[INDEX2(0,1,4)]+=tmp0_1; |
1971 |
EM_S[INDEX2(1,2,4)]+=tmp1_1; |
1972 |
EM_S[INDEX2(3,2,4)]+=tmp0_1; |
1973 |
EM_S[INDEX2(0,0,4)]+=tmp2_1; |
1974 |
EM_S[INDEX2(3,3,4)]+=tmp2_1; |
1975 |
EM_S[INDEX2(3,0,4)]+=tmp1_1; |
1976 |
EM_S[INDEX2(3,1,4)]+=tmp0_1; |
1977 |
EM_S[INDEX2(2,1,4)]+=tmp1_1; |
1978 |
EM_S[INDEX2(0,2,4)]+=tmp0_1; |
1979 |
EM_S[INDEX2(2,0,4)]+=tmp0_1; |
1980 |
EM_S[INDEX2(1,3,4)]+=tmp0_1; |
1981 |
EM_S[INDEX2(2,3,4)]+=tmp0_1; |
1982 |
EM_S[INDEX2(2,2,4)]+=tmp2_1; |
1983 |
EM_S[INDEX2(1,0,4)]+=tmp0_1; |
1984 |
EM_S[INDEX2(0,3,4)]+=tmp1_1; |
1985 |
EM_S[INDEX2(1,1,4)]+=tmp2_1; |
1986 |
} |
1987 |
} |
1988 |
/////////////// |
1989 |
// process X // |
1990 |
/////////////// |
1991 |
if (!X.isEmpty()) { |
1992 |
add_EM_F=true; |
1993 |
const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); |
1994 |
if (X.actsExpanded()) { |
1995 |
const register double X_0_0 = X_p[INDEX2(0,0,2)]; |
1996 |
const register double X_1_0 = X_p[INDEX2(1,0,2)]; |
1997 |
const register double X_0_1 = X_p[INDEX2(0,1,2)]; |
1998 |
const register double X_1_1 = X_p[INDEX2(1,1,2)]; |
1999 |
const register double X_0_2 = X_p[INDEX2(0,2,2)]; |
2000 |
const register double X_1_2 = X_p[INDEX2(1,2,2)]; |
2001 |
const register double X_0_3 = X_p[INDEX2(0,3,2)]; |
2002 |
const register double X_1_3 = X_p[INDEX2(1,3,2)]; |
2003 |
const register double tmp1_0 = X_1_1 + X_1_3; |
2004 |
const register double tmp3_0 = X_0_0 + X_0_1; |
2005 |
const register double tmp2_0 = X_1_0 + X_1_2; |
2006 |
const register double tmp0_0 = X_0_2 + X_0_3; |
2007 |
const register double tmp8_1 = tmp2_0*w66; |
2008 |
const register double tmp5_1 = tmp3_0*w64; |
2009 |
const register double tmp14_1 = tmp0_0*w64; |
2010 |
const register double tmp3_1 = tmp3_0*w60; |
2011 |
const register double tmp9_1 = tmp3_0*w63; |
2012 |
const register double tmp13_1 = tmp3_0*w65; |
2013 |
const register double tmp12_1 = tmp1_0*w66; |
2014 |
const register double tmp10_1 = tmp0_0*w60; |
2015 |
const register double tmp2_1 = tmp2_0*w61; |
2016 |
const register double tmp6_1 = tmp2_0*w62; |
2017 |
const register double tmp4_1 = tmp0_0*w65; |
2018 |
const register double tmp11_1 = tmp1_0*w67; |
2019 |
const register double tmp1_1 = tmp1_0*w62; |
2020 |
const register double tmp7_1 = tmp1_0*w61; |
2021 |
const register double tmp0_1 = tmp0_0*w63; |
2022 |
const register double tmp15_1 = tmp2_0*w67; |
2023 |
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; |
2024 |
EM_F[1]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; |
2025 |
EM_F[2]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; |
2026 |
EM_F[3]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; |
2027 |
} else { /* constant data */ |
2028 |
const register double X_0 = X_p[0]; |
2029 |
const register double X_1 = X_p[1]; |
2030 |
const register double tmp3_1 = X_1*w71; |
2031 |
const register double tmp2_1 = X_0*w70; |
2032 |
const register double tmp1_1 = X_0*w68; |
2033 |
const register double tmp0_1 = X_1*w69; |
2034 |
EM_F[0]+=tmp0_1 + tmp1_1; |
2035 |
EM_F[1]+=tmp0_1 + tmp2_1; |
2036 |
EM_F[2]+=tmp1_1 + tmp3_1; |
2037 |
EM_F[3]+=tmp2_1 + tmp3_1; |
2038 |
} |
2039 |
} |
2040 |
/////////////// |
2041 |
// process Y // |
2042 |
/////////////// |
2043 |
if (!Y.isEmpty()) { |
2044 |
add_EM_F=true; |
2045 |
const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); |
2046 |
if (Y.actsExpanded()) { |
2047 |
const register double Y_0 = Y_p[0]; |
2048 |
const register double Y_1 = Y_p[1]; |
2049 |
const register double Y_2 = Y_p[2]; |
2050 |
const register double Y_3 = Y_p[3]; |
2051 |
const register double tmp0_0 = Y_1 + Y_2; |
2052 |
const register double tmp1_0 = Y_0 + Y_3; |
2053 |
const register double tmp9_1 = Y_0*w74; |
2054 |
const register double tmp4_1 = tmp1_0*w73; |
2055 |
const register double tmp5_1 = Y_2*w74; |
2056 |
const register double tmp7_1 = Y_1*w74; |
2057 |
const register double tmp6_1 = Y_2*w72; |
2058 |
const register double tmp2_1 = Y_3*w74; |
2059 |
const register double tmp8_1 = Y_3*w72; |
2060 |
const register double tmp3_1 = Y_1*w72; |
2061 |
const register double tmp0_1 = Y_0*w72; |
2062 |
const register double tmp1_1 = tmp0_0*w73; |
2063 |
EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1; |
2064 |
EM_F[1]+=tmp3_1 + tmp4_1 + tmp5_1; |
2065 |
EM_F[2]+=tmp4_1 + tmp6_1 + tmp7_1; |
2066 |
EM_F[3]+=tmp1_1 + tmp8_1 + tmp9_1; |
2067 |
} else { /* constant data */ |
2068 |
const register double Y_0 = Y_p[0]; |
2069 |
const register double tmp0_1 = Y_0*w75; |
2070 |
EM_F[0]+=tmp0_1; |
2071 |
EM_F[1]+=tmp0_1; |
2072 |
EM_F[2]+=tmp0_1; |
2073 |
EM_F[3]+=tmp0_1; |
2074 |
} |
2075 |
} |
2076 |
/* GENERATOR SNIP_PDE_SINGLE BOTTOM */ |
2077 |
|
2078 |
// add to matrix (if add_EM_S) and RHS (if add_EM_F) |
2079 |
const index_t firstNode=m_N0*k1+k0; |
2080 |
IndexVector rowIndex; |
2081 |
rowIndex.push_back(m_dofMap[firstNode]); |
2082 |
rowIndex.push_back(m_dofMap[firstNode+1]); |
2083 |
rowIndex.push_back(m_dofMap[firstNode+m_N0]); |
2084 |
rowIndex.push_back(m_dofMap[firstNode+m_N0+1]); |
2085 |
if (add_EM_F) { |
2086 |
//cout << "-- AddtoRHS -- " << endl; |
2087 |
double *F_p=rhs.getSampleDataRW(0); |
2088 |
for (index_t i=0; i<rowIndex.size(); i++) { |
2089 |
if (rowIndex[i]<getNumDOF()) { |
2090 |
F_p[rowIndex[i]]+=EM_F[i]; |
2091 |
//cout << "F[" << rowIndex[i] << "]=" << F_p[rowIndex[i]] << endl; |
2092 |
} |
2093 |
} |
2094 |
//cout << "---"<<endl; |
2095 |
} |
2096 |
if (add_EM_S) { |
2097 |
//cout << "-- AddtoSystem -- " << endl; |
2098 |
addToSystemMatrix(mat, rowIndex, 1, rowIndex, 1, EM_S); |
2099 |
} |
2100 |
|
2101 |
} // end k0 loop |
2102 |
} // end k1 loop |
2103 |
} // end of coloring |
2104 |
} // end of parallel region |
2105 |
} |
2106 |
|
2107 |
} // end of namespace ripley |
2108 |
|