/[escript]/branches/ripleygmg_from_3668/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/ripleygmg_from_3668/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3713 - (show annotations)
Tue Dec 6 04:43:29 2011 UTC (7 years, 6 months ago) by caltinay
File size: 108018 byte(s)
Integrals in 2D and 3D for rectangle & brick elements & face elements in
regular and reduced orders.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52
53 // local number of elements
54 m_NE0 = n0/m_NX;
55 m_NE1 = n1/m_NY;
56 m_NE2 = n2/m_NZ;
57 // local number of nodes (not necessarily owned)
58 m_N0 = m_NE0+1;
59 m_N1 = m_NE1+1;
60 m_N2 = m_NE2+1;
61 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63 m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64 m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65 populateSampleIds();
66 }
67
68
69 Brick::~Brick()
70 {
71 }
72
73 string Brick::getDescription() const
74 {
75 return "ripley::Brick";
76 }
77
78 bool Brick::operator==(const AbstractDomain& other) const
79 {
80 if (dynamic_cast<const Brick*>(&other))
81 return this==&other;
82
83 return false;
84 }
85
86 void Brick::dump(const string& fileName) const
87 {
88 #if USE_SILO
89 string fn(fileName);
90 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
91 fn+=".silo";
92 }
93
94 const int NUM_SILO_FILES = 1;
95 const char* blockDirFmt = "/block%04d";
96 int driver=DB_HDF5;
97 string siloPath;
98 DBfile* dbfile = NULL;
99
100 #ifdef ESYS_MPI
101 PMPIO_baton_t* baton = NULL;
102 #endif
103
104 if (m_mpiInfo->size > 1) {
105 #ifdef ESYS_MPI
106 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
107 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
108 PMPIO_DefaultClose, (void*)&driver);
109 // try the fallback driver in case of error
110 if (!baton && driver != DB_PDB) {
111 driver = DB_PDB;
112 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
113 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
114 PMPIO_DefaultClose, (void*)&driver);
115 }
116 if (baton) {
117 char str[64];
118 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
119 siloPath = str;
120 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
121 }
122 #endif
123 } else {
124 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
125 getDescription().c_str(), driver);
126 // try the fallback driver in case of error
127 if (!dbfile && driver != DB_PDB) {
128 driver = DB_PDB;
129 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130 getDescription().c_str(), driver);
131 }
132 }
133
134 if (!dbfile)
135 throw RipleyException("dump: Could not create Silo file");
136
137 /*
138 if (driver==DB_HDF5) {
139 // gzip level 1 already provides good compression with minimal
140 // performance penalty. Some tests showed that gzip levels >3 performed
141 // rather badly on escript data both in terms of time and space
142 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
143 }
144 */
145
146 boost::scoped_ptr<double> x(new double[m_N0]);
147 boost::scoped_ptr<double> y(new double[m_N1]);
148 boost::scoped_ptr<double> z(new double[m_N2]);
149 double* coords[3] = { x.get(), y.get(), z.get() };
150 pair<double,double> xdx = getFirstCoordAndSpacing(0);
151 pair<double,double> ydy = getFirstCoordAndSpacing(1);
152 pair<double,double> zdz = getFirstCoordAndSpacing(2);
153 #pragma omp parallel
154 {
155 #pragma omp for
156 for (dim_t i0 = 0; i0 < m_N0; i0++) {
157 coords[0][i0]=xdx.first+i0*xdx.second;
158 }
159 #pragma omp for
160 for (dim_t i1 = 0; i1 < m_N1; i1++) {
161 coords[1][i1]=ydy.first+i1*ydy.second;
162 }
163 #pragma omp for
164 for (dim_t i2 = 0; i2 < m_N2; i2++) {
165 coords[2][i2]=zdz.first+i2*zdz.second;
166 }
167 }
168 IndexVector dims = getNumNodesPerDim();
169 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
170 DB_COLLINEAR, NULL);
171
172 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
173 NULL, 0, DB_INT, DB_NODECENT, NULL);
174
175 // write element ids
176 dims = getNumElementsPerDim();
177 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
178 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
179
180 // rank 0 writes multimesh and multivar
181 if (m_mpiInfo->rank == 0) {
182 vector<string> tempstrings;
183 vector<char*> names;
184 for (dim_t i=0; i<m_mpiInfo->size; i++) {
185 stringstream path;
186 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
187 tempstrings.push_back(path.str());
188 names.push_back((char*)tempstrings.back().c_str());
189 }
190 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
191 DBSetDir(dbfile, "/");
192 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
193 &types[0], NULL);
194 tempstrings.clear();
195 names.clear();
196 for (dim_t i=0; i<m_mpiInfo->size; i++) {
197 stringstream path;
198 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
199 tempstrings.push_back(path.str());
200 names.push_back((char*)tempstrings.back().c_str());
201 }
202 types.assign(m_mpiInfo->size, DB_QUADVAR);
203 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
204 &types[0], NULL);
205 tempstrings.clear();
206 names.clear();
207 for (dim_t i=0; i<m_mpiInfo->size; i++) {
208 stringstream path;
209 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
210 tempstrings.push_back(path.str());
211 names.push_back((char*)tempstrings.back().c_str());
212 }
213 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
214 &types[0], NULL);
215 }
216
217 if (m_mpiInfo->size > 1) {
218 #ifdef ESYS_MPI
219 PMPIO_HandOffBaton(baton, dbfile);
220 PMPIO_Finish(baton);
221 #endif
222 } else {
223 DBClose(dbfile);
224 }
225
226 #else // USE_SILO
227 throw RipleyException("dump(): no Silo support");
228 #endif
229 }
230
231 const int* Brick::borrowSampleReferenceIDs(int fsType) const
232 {
233 switch (fsType) {
234 case Nodes:
235 return &m_nodeId[0];
236 case Elements:
237 return &m_elementId[0];
238 case FaceElements:
239 return &m_faceId[0];
240 default:
241 break;
242 }
243
244 stringstream msg;
245 msg << "borrowSampleReferenceIDs() not implemented for function space type "
246 << fsType;
247 throw RipleyException(msg.str());
248 }
249
250 bool Brick::ownSample(int fsCode, index_t id) const
251 {
252 #ifdef ESYS_MPI
253 if (fsCode == Nodes) {
254 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
255 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
256 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
257 } else
258 throw RipleyException("ownSample() only implemented for Nodes");
259 #else
260 return true;
261 #endif
262 }
263
264 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
265 {
266 escript::Data& in = *const_cast<escript::Data*>(&cIn);
267 const dim_t numComp = in.getDataPointSize();
268 const double h0 = m_l0/m_gNE0;
269 const double h1 = m_l1/m_gNE1;
270 const double h2 = m_l1/m_gNE2;
271 if (out.getFunctionSpace().getTypeCode() == Elements) {
272 /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
273 const double tmp0_22 = -0.044658198738520451079/h1;
274 const double tmp0_16 = 0.16666666666666666667/h0;
275 const double tmp0_33 = -0.62200846792814621559/h1;
276 const double tmp0_0 = -0.62200846792814621559/h0;
277 const double tmp0_21 = -0.16666666666666666667/h1;
278 const double tmp0_17 = 0.62200846792814621559/h0;
279 const double tmp0_52 = -0.044658198738520451079/h2;
280 const double tmp0_1 = -0.16666666666666666667/h0;
281 const double tmp0_20 = -0.62200846792814621559/h1;
282 const double tmp0_14 = -0.044658198738520451079/h0;
283 const double tmp0_53 = -0.62200846792814621559/h2;
284 const double tmp0_49 = 0.16666666666666666667/h2;
285 const double tmp0_2 = 0.16666666666666666667/h0;
286 const double tmp0_27 = -0.044658198738520451079/h1;
287 const double tmp0_15 = -0.16666666666666666667/h0;
288 const double tmp0_50 = -0.16666666666666666667/h2;
289 const double tmp0_48 = 0.62200846792814621559/h2;
290 const double tmp0_3 = 0.044658198738520451079/h0;
291 const double tmp0_26 = -0.16666666666666666667/h1;
292 const double tmp0_12 = -0.62200846792814621559/h0;
293 const double tmp0_51 = 0.044658198738520451079/h2;
294 const double tmp0_25 = 0.62200846792814621559/h1;
295 const double tmp0_13 = 0.16666666666666666667/h0;
296 const double tmp0_56 = 0.16666666666666666667/h2;
297 const double tmp0_24 = 0.16666666666666666667/h1;
298 const double tmp0_10 = 0.62200846792814621559/h0;
299 const double tmp0_57 = 0.62200846792814621559/h2;
300 const double tmp0_11 = -0.16666666666666666667/h0;
301 const double tmp0_54 = -0.044658198738520451079/h2;
302 const double tmp0_38 = 0.16666666666666666667/h1;
303 const double tmp0_34 = -0.044658198738520451079/h1;
304 const double tmp0_42 = 0.16666666666666666667/h2;
305 const double tmp0_35 = -0.16666666666666666667/h1;
306 const double tmp0_36 = -0.62200846792814621559/h1;
307 const double tmp0_41 = 0.62200846792814621559/h2;
308 const double tmp0_8 = 0.044658198738520451079/h0;
309 const double tmp0_37 = 0.62200846792814621559/h1;
310 const double tmp0_29 = 0.16666666666666666667/h1;
311 const double tmp0_40 = -0.62200846792814621559/h2;
312 const double tmp0_9 = 0.16666666666666666667/h0;
313 const double tmp0_30 = 0.62200846792814621559/h1;
314 const double tmp0_28 = -0.16666666666666666667/h1;
315 const double tmp0_43 = 0.044658198738520451079/h2;
316 const double tmp0_32 = 0.16666666666666666667/h1;
317 const double tmp0_31 = 0.044658198738520451079/h1;
318 const double tmp0_39 = 0.044658198738520451079/h1;
319 const double tmp0_58 = -0.62200846792814621559/h2;
320 const double tmp0_55 = 0.044658198738520451079/h2;
321 const double tmp0_18 = -0.62200846792814621559/h0;
322 const double tmp0_45 = -0.16666666666666666667/h2;
323 const double tmp0_59 = -0.16666666666666666667/h2;
324 const double tmp0_4 = -0.044658198738520451079/h0;
325 const double tmp0_19 = 0.044658198738520451079/h0;
326 const double tmp0_44 = -0.044658198738520451079/h2;
327 const double tmp0_5 = 0.62200846792814621559/h0;
328 const double tmp0_47 = 0.16666666666666666667/h2;
329 const double tmp0_6 = -0.16666666666666666667/h0;
330 const double tmp0_23 = 0.044658198738520451079/h1;
331 const double tmp0_46 = -0.16666666666666666667/h2;
332 const double tmp0_7 = -0.044658198738520451079/h0;
333 #pragma omp parallel for
334 for (index_t k2=0; k2 < m_NE2; ++k2) {
335 for (index_t k1=0; k1 < m_NE1; ++k1) {
336 for (index_t k0=0; k0 < m_NE0; ++k0) {
337 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
338 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
339 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
340 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
341 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
342 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
343 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
344 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
345 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
346 for (index_t i=0; i < numComp; ++i) {
347 o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
348 o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_20 + f_010[i]*tmp0_25 + f_101[i]*tmp0_22 + f_111[i]*tmp0_23 + tmp0_21*(f_001[i] + f_100[i]) + tmp0_24*(f_011[i] + f_110[i]);
349 o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_40 + f_001[i]*tmp0_41 + f_110[i]*tmp0_44 + f_111[i]*tmp0_43 + tmp0_42*(f_011[i] + f_101[i]) + tmp0_45*(f_010[i] + f_100[i]);
350 o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
351 o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_27 + f_010[i]*tmp0_32 + f_011[i]*tmp0_31 + f_100[i]*tmp0_33 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
352 o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_52 + f_011[i]*tmp0_51 + f_100[i]*tmp0_53 + f_101[i]*tmp0_48 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
353 o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
354 o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_20 + f_010[i]*tmp0_25 + f_101[i]*tmp0_22 + f_111[i]*tmp0_23 + tmp0_21*(f_001[i] + f_100[i]) + tmp0_24*(f_011[i] + f_110[i]);
355 o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_53 + f_011[i]*tmp0_48 + f_100[i]*tmp0_52 + f_101[i]*tmp0_51 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
356 o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
357 o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_27 + f_010[i]*tmp0_32 + f_011[i]*tmp0_31 + f_100[i]*tmp0_33 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
358 o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_54 + f_001[i]*tmp0_55 + f_110[i]*tmp0_58 + f_111[i]*tmp0_57 + tmp0_56*(f_011[i] + f_101[i]) + tmp0_59*(f_010[i] + f_100[i]);
359 o[INDEX3(i,0,4,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
360 o[INDEX3(i,1,4,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_33 + f_010[i]*tmp0_32 + f_011[i]*tmp0_30 + f_100[i]*tmp0_27 + f_101[i]*tmp0_28 + f_110[i]*tmp0_31 + f_111[i]*tmp0_29;
361 o[INDEX3(i,2,4,numComp,3)] = f_000[i]*tmp0_40 + f_001[i]*tmp0_41 + f_110[i]*tmp0_44 + f_111[i]*tmp0_43 + tmp0_42*(f_011[i] + f_101[i]) + tmp0_45*(f_010[i] + f_100[i]);
362 o[INDEX3(i,0,5,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
363 o[INDEX3(i,1,5,numComp,3)] = f_000[i]*tmp0_34 + f_010[i]*tmp0_39 + f_101[i]*tmp0_36 + f_111[i]*tmp0_37 + tmp0_35*(f_001[i] + f_100[i]) + tmp0_38*(f_011[i] + f_110[i]);
364 o[INDEX3(i,2,5,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_52 + f_011[i]*tmp0_51 + f_100[i]*tmp0_53 + f_101[i]*tmp0_48 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
365 o[INDEX3(i,0,6,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
366 o[INDEX3(i,1,6,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_33 + f_010[i]*tmp0_32 + f_011[i]*tmp0_30 + f_100[i]*tmp0_27 + f_101[i]*tmp0_28 + f_110[i]*tmp0_31 + f_111[i]*tmp0_29;
367 o[INDEX3(i,2,6,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_53 + f_011[i]*tmp0_48 + f_100[i]*tmp0_52 + f_101[i]*tmp0_51 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
368 o[INDEX3(i,0,7,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
369 o[INDEX3(i,1,7,numComp,3)] = f_000[i]*tmp0_34 + f_010[i]*tmp0_39 + f_101[i]*tmp0_36 + f_111[i]*tmp0_37 + tmp0_35*(f_001[i] + f_100[i]) + tmp0_38*(f_011[i] + f_110[i]);
370 o[INDEX3(i,2,7,numComp,3)] = f_000[i]*tmp0_54 + f_001[i]*tmp0_55 + f_110[i]*tmp0_58 + f_111[i]*tmp0_57 + tmp0_56*(f_011[i] + f_101[i]) + tmp0_59*(f_010[i] + f_100[i]);
371 } /* end of component loop i */
372 } /* end of k0 loop */
373 } /* end of k1 loop */
374 } /* end of k2 loop */
375 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
376 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
377 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
378 const double tmp0_0 = -0.25/h0;
379 const double tmp0_4 = -0.25/h2;
380 const double tmp0_1 = 0.25/h0;
381 const double tmp0_5 = 0.25/h2;
382 const double tmp0_2 = -0.25/h1;
383 const double tmp0_3 = 0.25/h1;
384 #pragma omp parallel for
385 for (index_t k2=0; k2 < m_NE2; ++k2) {
386 for (index_t k1=0; k1 < m_NE1; ++k1) {
387 for (index_t k0=0; k0 < m_NE0; ++k0) {
388 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
389 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
390 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
391 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
392 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
393 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
394 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
395 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
396 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
397 for (index_t i=0; i < numComp; ++i) {
398 o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]) + tmp0_1*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
399 o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_000[i] + f_001[i] + f_100[i] + f_101[i]) + tmp0_3*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
400 o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_000[i] + f_010[i] + f_100[i] + f_110[i]) + tmp0_5*(f_001[i] + f_011[i] + f_101[i] + f_111[i]);
401 } /* end of component loop i */
402 } /* end of k0 loop */
403 } /* end of k1 loop */
404 } /* end of k2 loop */
405 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
406 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
407 /* GENERATOR SNIP_GRAD_FACES TOP */
408 if (m_faceOffset[0] > -1) {
409 const double tmp0_22 = 0.21132486540518711775/h1;
410 const double tmp0_16 = 0.16666666666666666667/h0;
411 const double tmp0_33 = 0.21132486540518711775/h2;
412 const double tmp0_0 = -0.62200846792814621559/h0;
413 const double tmp0_21 = -0.21132486540518711775/h1;
414 const double tmp0_17 = 0.62200846792814621559/h0;
415 const double tmp0_1 = -0.16666666666666666667/h0;
416 const double tmp0_20 = -0.78867513459481288225/h1;
417 const double tmp0_14 = -0.044658198738520451079/h0;
418 const double tmp0_2 = 0.16666666666666666667/h0;
419 const double tmp0_27 = 0.21132486540518711775/h1;
420 const double tmp0_15 = -0.16666666666666666667/h0;
421 const double tmp0_3 = 0.044658198738520451079/h0;
422 const double tmp0_26 = 0.78867513459481288225/h1;
423 const double tmp0_12 = -0.62200846792814621559/h0;
424 const double tmp0_25 = -0.78867513459481288225/h1;
425 const double tmp0_13 = 0.16666666666666666667/h0;
426 const double tmp0_24 = -0.21132486540518711775/h1;
427 const double tmp0_10 = 0.62200846792814621559/h0;
428 const double tmp0_11 = -0.16666666666666666667/h0;
429 const double tmp0_34 = 0.78867513459481288225/h2;
430 const double tmp0_35 = -0.78867513459481288225/h2;
431 const double tmp0_8 = 0.044658198738520451079/h0;
432 const double tmp0_29 = 0.78867513459481288225/h2;
433 const double tmp0_9 = 0.16666666666666666667/h0;
434 const double tmp0_30 = 0.21132486540518711775/h2;
435 const double tmp0_28 = -0.78867513459481288225/h2;
436 const double tmp0_32 = -0.21132486540518711775/h2;
437 const double tmp0_31 = -0.21132486540518711775/h2;
438 const double tmp0_18 = -0.62200846792814621559/h0;
439 const double tmp0_4 = -0.044658198738520451079/h0;
440 const double tmp0_19 = 0.044658198738520451079/h0;
441 const double tmp0_5 = 0.62200846792814621559/h0;
442 const double tmp0_6 = -0.16666666666666666667/h0;
443 const double tmp0_23 = 0.78867513459481288225/h1;
444 const double tmp0_7 = -0.044658198738520451079/h0;
445 #pragma omp parallel for
446 for (index_t k2=0; k2 < m_NE2; ++k2) {
447 for (index_t k1=0; k1 < m_NE1; ++k1) {
448 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
449 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
450 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
451 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
452 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
453 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
454 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
455 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
456 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
457 for (index_t i=0; i < numComp; ++i) {
458 o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
459 o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_20 + f_001[i]*tmp0_21 + f_010[i]*tmp0_23 + f_011[i]*tmp0_22;
460 o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_010[i]*tmp0_31 + f_011[i]*tmp0_30;
461 o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
462 o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_20 + f_001[i]*tmp0_21 + f_010[i]*tmp0_23 + f_011[i]*tmp0_22;
463 o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_010[i]*tmp0_35 + f_011[i]*tmp0_34;
464 o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
465 o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_24 + f_001[i]*tmp0_25 + f_010[i]*tmp0_27 + f_011[i]*tmp0_26;
466 o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_010[i]*tmp0_31 + f_011[i]*tmp0_30;
467 o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
468 o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_24 + f_001[i]*tmp0_25 + f_010[i]*tmp0_27 + f_011[i]*tmp0_26;
469 o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_010[i]*tmp0_35 + f_011[i]*tmp0_34;
470 } /* end of component loop i */
471 } /* end of k1 loop */
472 } /* end of k2 loop */
473 } /* end of face 0 */
474 if (m_faceOffset[1] > -1) {
475 const double tmp0_22 = 0.78867513459481288225/h1;
476 const double tmp0_16 = 0.16666666666666666667/h0;
477 const double tmp0_33 = 0.78867513459481288225/h2;
478 const double tmp0_0 = -0.62200846792814621559/h0;
479 const double tmp0_21 = 0.21132486540518711775/h1;
480 const double tmp0_17 = 0.62200846792814621559/h0;
481 const double tmp0_1 = -0.16666666666666666667/h0;
482 const double tmp0_20 = -0.21132486540518711775/h1;
483 const double tmp0_14 = -0.044658198738520451079/h0;
484 const double tmp0_2 = 0.16666666666666666667/h0;
485 const double tmp0_27 = -0.21132486540518711775/h1;
486 const double tmp0_15 = -0.16666666666666666667/h0;
487 const double tmp0_3 = 0.044658198738520451079/h0;
488 const double tmp0_26 = 0.21132486540518711775/h1;
489 const double tmp0_12 = -0.62200846792814621559/h0;
490 const double tmp0_25 = 0.78867513459481288225/h1;
491 const double tmp0_13 = 0.16666666666666666667/h0;
492 const double tmp0_24 = -0.78867513459481288225/h1;
493 const double tmp0_10 = 0.62200846792814621559/h0;
494 const double tmp0_11 = -0.16666666666666666667/h0;
495 const double tmp0_34 = -0.78867513459481288225/h2;
496 const double tmp0_35 = -0.21132486540518711775/h2;
497 const double tmp0_8 = 0.044658198738520451079/h0;
498 const double tmp0_29 = 0.21132486540518711775/h2;
499 const double tmp0_9 = 0.16666666666666666667/h0;
500 const double tmp0_30 = -0.21132486540518711775/h2;
501 const double tmp0_28 = 0.78867513459481288225/h2;
502 const double tmp0_32 = 0.21132486540518711775/h2;
503 const double tmp0_31 = -0.78867513459481288225/h2;
504 const double tmp0_18 = -0.62200846792814621559/h0;
505 const double tmp0_4 = -0.044658198738520451079/h0;
506 const double tmp0_19 = 0.044658198738520451079/h0;
507 const double tmp0_5 = 0.62200846792814621559/h0;
508 const double tmp0_6 = -0.16666666666666666667/h0;
509 const double tmp0_23 = -0.78867513459481288225/h1;
510 const double tmp0_7 = -0.044658198738520451079/h0;
511 #pragma omp parallel for
512 for (index_t k2=0; k2 < m_NE2; ++k2) {
513 for (index_t k1=0; k1 < m_NE1; ++k1) {
514 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
515 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
516 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
517 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
518 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
519 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
520 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
521 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
522 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
523 for (index_t i=0; i < numComp; ++i) {
524 o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
525 o[INDEX3(i,1,0,numComp,3)] = f_100[i]*tmp0_23 + f_101[i]*tmp0_20 + f_110[i]*tmp0_22 + f_111[i]*tmp0_21;
526 o[INDEX3(i,2,0,numComp,3)] = f_100[i]*tmp0_31 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
527 o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
528 o[INDEX3(i,1,1,numComp,3)] = f_100[i]*tmp0_23 + f_101[i]*tmp0_20 + f_110[i]*tmp0_22 + f_111[i]*tmp0_21;
529 o[INDEX3(i,2,1,numComp,3)] = f_100[i]*tmp0_35 + f_101[i]*tmp0_32 + f_110[i]*tmp0_34 + f_111[i]*tmp0_33;
530 o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
531 o[INDEX3(i,1,2,numComp,3)] = f_100[i]*tmp0_27 + f_101[i]*tmp0_24 + f_110[i]*tmp0_26 + f_111[i]*tmp0_25;
532 o[INDEX3(i,2,2,numComp,3)] = f_100[i]*tmp0_31 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
533 o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
534 o[INDEX3(i,1,3,numComp,3)] = f_100[i]*tmp0_27 + f_101[i]*tmp0_24 + f_110[i]*tmp0_26 + f_111[i]*tmp0_25;
535 o[INDEX3(i,2,3,numComp,3)] = f_100[i]*tmp0_35 + f_101[i]*tmp0_32 + f_110[i]*tmp0_34 + f_111[i]*tmp0_33;
536 } /* end of component loop i */
537 } /* end of k1 loop */
538 } /* end of k2 loop */
539 } /* end of face 1 */
540 if (m_faceOffset[2] > -1) {
541 const double tmp0_22 = -0.044658198738520451079/h1;
542 const double tmp0_16 = -0.16666666666666666667/h1;
543 const double tmp0_33 = 0.21132486540518711775/h2;
544 const double tmp0_0 = -0.78867513459481288225/h0;
545 const double tmp0_21 = 0.16666666666666666667/h1;
546 const double tmp0_17 = -0.62200846792814621559/h1;
547 const double tmp0_1 = -0.21132486540518711775/h0;
548 const double tmp0_20 = 0.044658198738520451079/h1;
549 const double tmp0_14 = -0.16666666666666666667/h1;
550 const double tmp0_2 = 0.21132486540518711775/h0;
551 const double tmp0_27 = 0.044658198738520451079/h1;
552 const double tmp0_15 = -0.044658198738520451079/h1;
553 const double tmp0_3 = 0.78867513459481288225/h0;
554 const double tmp0_26 = 0.16666666666666666667/h1;
555 const double tmp0_12 = 0.16666666666666666667/h1;
556 const double tmp0_25 = 0.62200846792814621559/h1;
557 const double tmp0_13 = 0.62200846792814621559/h1;
558 const double tmp0_24 = -0.62200846792814621559/h1;
559 const double tmp0_10 = -0.044658198738520451079/h1;
560 const double tmp0_11 = 0.044658198738520451079/h1;
561 const double tmp0_34 = 0.78867513459481288225/h2;
562 const double tmp0_35 = -0.78867513459481288225/h2;
563 const double tmp0_8 = -0.62200846792814621559/h1;
564 const double tmp0_29 = 0.78867513459481288225/h2;
565 const double tmp0_9 = -0.16666666666666666667/h1;
566 const double tmp0_30 = 0.21132486540518711775/h2;
567 const double tmp0_28 = -0.78867513459481288225/h2;
568 const double tmp0_32 = -0.21132486540518711775/h2;
569 const double tmp0_31 = -0.21132486540518711775/h2;
570 const double tmp0_18 = 0.16666666666666666667/h1;
571 const double tmp0_4 = -0.21132486540518711775/h0;
572 const double tmp0_19 = 0.62200846792814621559/h1;
573 const double tmp0_5 = -0.78867513459481288225/h0;
574 const double tmp0_6 = 0.78867513459481288225/h0;
575 const double tmp0_23 = -0.16666666666666666667/h1;
576 const double tmp0_7 = 0.21132486540518711775/h0;
577 #pragma omp parallel for
578 for (index_t k2=0; k2 < m_NE2; ++k2) {
579 for (index_t k0=0; k0 < m_NE0; ++k0) {
580 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
581 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
582 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
583 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
584 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
585 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
586 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
587 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
588 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
589 for (index_t i=0; i < numComp; ++i) {
590 o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_001[i]*tmp0_1 + f_100[i]*tmp0_3 + f_101[i]*tmp0_2;
591 o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_13 + f_101[i]*tmp0_10 + f_111[i]*tmp0_11 + tmp0_12*(f_011[i] + f_110[i]) + tmp0_9*(f_001[i] + f_100[i]);
592 o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_100[i]*tmp0_31 + f_101[i]*tmp0_30;
593 o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_001[i]*tmp0_1 + f_100[i]*tmp0_3 + f_101[i]*tmp0_2;
594 o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_14 + f_001[i]*tmp0_15 + f_010[i]*tmp0_21 + f_011[i]*tmp0_20 + f_100[i]*tmp0_17 + f_101[i]*tmp0_16 + f_110[i]*tmp0_19 + f_111[i]*tmp0_18;
595 o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_100[i]*tmp0_35 + f_101[i]*tmp0_34;
596 o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_4 + f_001[i]*tmp0_5 + f_100[i]*tmp0_7 + f_101[i]*tmp0_6;
597 o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_14 + f_001[i]*tmp0_17 + f_010[i]*tmp0_21 + f_011[i]*tmp0_19 + f_100[i]*tmp0_15 + f_101[i]*tmp0_16 + f_110[i]*tmp0_20 + f_111[i]*tmp0_18;
598 o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_100[i]*tmp0_31 + f_101[i]*tmp0_30;
599 o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_4 + f_001[i]*tmp0_5 + f_100[i]*tmp0_7 + f_101[i]*tmp0_6;
600 o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_22 + f_010[i]*tmp0_27 + f_101[i]*tmp0_24 + f_111[i]*tmp0_25 + tmp0_23*(f_001[i] + f_100[i]) + tmp0_26*(f_011[i] + f_110[i]);
601 o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_100[i]*tmp0_35 + f_101[i]*tmp0_34;
602 } /* end of component loop i */
603 } /* end of k0 loop */
604 } /* end of k2 loop */
605 } /* end of face 2 */
606 if (m_faceOffset[3] > -1) {
607 const double tmp0_22 = 0.16666666666666666667/h1;
608 const double tmp0_16 = 0.16666666666666666667/h1;
609 const double tmp0_33 = -0.78867513459481288225/h2;
610 const double tmp0_0 = -0.21132486540518711775/h0;
611 const double tmp0_21 = -0.62200846792814621559/h1;
612 const double tmp0_17 = 0.16666666666666666667/h1;
613 const double tmp0_1 = 0.78867513459481288225/h0;
614 const double tmp0_20 = -0.16666666666666666667/h1;
615 const double tmp0_14 = 0.044658198738520451079/h1;
616 const double tmp0_2 = -0.78867513459481288225/h0;
617 const double tmp0_27 = -0.62200846792814621559/h1;
618 const double tmp0_15 = 0.62200846792814621559/h1;
619 const double tmp0_3 = 0.21132486540518711775/h0;
620 const double tmp0_26 = -0.16666666666666666667/h1;
621 const double tmp0_12 = -0.16666666666666666667/h1;
622 const double tmp0_25 = -0.044658198738520451079/h1;
623 const double tmp0_13 = -0.044658198738520451079/h1;
624 const double tmp0_24 = 0.62200846792814621559/h1;
625 const double tmp0_10 = 0.044658198738520451079/h1;
626 const double tmp0_11 = -0.62200846792814621559/h1;
627 const double tmp0_34 = -0.21132486540518711775/h2;
628 const double tmp0_35 = 0.78867513459481288225/h2;
629 const double tmp0_8 = 0.16666666666666666667/h1;
630 const double tmp0_29 = -0.21132486540518711775/h2;
631 const double tmp0_9 = 0.62200846792814621559/h1;
632 const double tmp0_30 = -0.78867513459481288225/h2;
633 const double tmp0_28 = 0.78867513459481288225/h2;
634 const double tmp0_32 = 0.21132486540518711775/h2;
635 const double tmp0_31 = 0.21132486540518711775/h2;
636 const double tmp0_18 = -0.16666666666666666667/h1;
637 const double tmp0_4 = -0.78867513459481288225/h0;
638 const double tmp0_19 = -0.044658198738520451079/h1;
639 const double tmp0_5 = 0.21132486540518711775/h0;
640 const double tmp0_6 = -0.21132486540518711775/h0;
641 const double tmp0_23 = 0.044658198738520451079/h1;
642 const double tmp0_7 = 0.78867513459481288225/h0;
643 #pragma omp parallel for
644 for (index_t k2=0; k2 < m_NE2; ++k2) {
645 for (index_t k0=0; k0 < m_NE0; ++k0) {
646 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
647 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
648 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
649 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
650 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
651 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
652 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
653 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
654 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
655 for (index_t i=0; i < numComp; ++i) {
656 o[INDEX3(i,0,0,numComp,3)] = f_010[i]*tmp0_2 + f_011[i]*tmp0_0 + f_110[i]*tmp0_1 + f_111[i]*tmp0_3;
657 o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_11 + f_010[i]*tmp0_9 + f_101[i]*tmp0_13 + f_111[i]*tmp0_10 + tmp0_12*(f_001[i] + f_100[i]) + tmp0_8*(f_011[i] + f_110[i]);
658 o[INDEX3(i,2,0,numComp,3)] = f_010[i]*tmp0_30 + f_011[i]*tmp0_28 + f_110[i]*tmp0_29 + f_111[i]*tmp0_31;
659 o[INDEX3(i,0,1,numComp,3)] = f_010[i]*tmp0_2 + f_011[i]*tmp0_0 + f_110[i]*tmp0_1 + f_111[i]*tmp0_3;
660 o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_18 + f_001[i]*tmp0_19 + f_010[i]*tmp0_16 + f_011[i]*tmp0_14 + f_100[i]*tmp0_21 + f_101[i]*tmp0_20 + f_110[i]*tmp0_15 + f_111[i]*tmp0_17;
661 o[INDEX3(i,2,1,numComp,3)] = f_010[i]*tmp0_34 + f_011[i]*tmp0_32 + f_110[i]*tmp0_33 + f_111[i]*tmp0_35;
662 o[INDEX3(i,0,2,numComp,3)] = f_010[i]*tmp0_6 + f_011[i]*tmp0_4 + f_110[i]*tmp0_5 + f_111[i]*tmp0_7;
663 o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_18 + f_001[i]*tmp0_21 + f_010[i]*tmp0_16 + f_011[i]*tmp0_15 + f_100[i]*tmp0_19 + f_101[i]*tmp0_20 + f_110[i]*tmp0_14 + f_111[i]*tmp0_17;
664 o[INDEX3(i,2,2,numComp,3)] = f_010[i]*tmp0_30 + f_011[i]*tmp0_28 + f_110[i]*tmp0_29 + f_111[i]*tmp0_31;
665 o[INDEX3(i,0,3,numComp,3)] = f_010[i]*tmp0_6 + f_011[i]*tmp0_4 + f_110[i]*tmp0_5 + f_111[i]*tmp0_7;
666 o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_25 + f_010[i]*tmp0_23 + f_101[i]*tmp0_27 + f_111[i]*tmp0_24 + tmp0_22*(f_011[i] + f_110[i]) + tmp0_26*(f_001[i] + f_100[i]);
667 o[INDEX3(i,2,3,numComp,3)] = f_010[i]*tmp0_34 + f_011[i]*tmp0_32 + f_110[i]*tmp0_33 + f_111[i]*tmp0_35;
668 } /* end of component loop i */
669 } /* end of k0 loop */
670 } /* end of k2 loop */
671 } /* end of face 3 */
672 if (m_faceOffset[4] > -1) {
673 const double tmp0_22 = -0.16666666666666666667/h2;
674 const double tmp0_16 = -0.62200846792814621559/h2;
675 const double tmp0_33 = 0.044658198738520451079/h2;
676 const double tmp0_0 = -0.78867513459481288225/h0;
677 const double tmp0_21 = 0.044658198738520451079/h2;
678 const double tmp0_17 = -0.16666666666666666667/h2;
679 const double tmp0_1 = 0.78867513459481288225/h0;
680 const double tmp0_20 = 0.16666666666666666667/h2;
681 const double tmp0_14 = 0.78867513459481288225/h1;
682 const double tmp0_2 = 0.21132486540518711775/h0;
683 const double tmp0_27 = 0.62200846792814621559/h2;
684 const double tmp0_15 = 0.21132486540518711775/h1;
685 const double tmp0_3 = -0.21132486540518711775/h0;
686 const double tmp0_26 = 0.16666666666666666667/h2;
687 const double tmp0_12 = -0.21132486540518711775/h1;
688 const double tmp0_25 = -0.044658198738520451079/h2;
689 const double tmp0_13 = -0.78867513459481288225/h1;
690 const double tmp0_24 = -0.16666666666666666667/h2;
691 const double tmp0_10 = 0.21132486540518711775/h1;
692 const double tmp0_11 = 0.78867513459481288225/h1;
693 const double tmp0_34 = 0.16666666666666666667/h2;
694 const double tmp0_35 = 0.62200846792814621559/h2;
695 const double tmp0_8 = -0.78867513459481288225/h1;
696 const double tmp0_29 = 0.16666666666666666667/h2;
697 const double tmp0_9 = -0.21132486540518711775/h1;
698 const double tmp0_30 = -0.044658198738520451079/h2;
699 const double tmp0_28 = 0.044658198738520451079/h2;
700 const double tmp0_32 = -0.62200846792814621559/h2;
701 const double tmp0_31 = -0.16666666666666666667/h2;
702 const double tmp0_18 = -0.044658198738520451079/h2;
703 const double tmp0_4 = -0.21132486540518711775/h0;
704 const double tmp0_19 = 0.62200846792814621559/h2;
705 const double tmp0_5 = 0.21132486540518711775/h0;
706 const double tmp0_6 = 0.78867513459481288225/h0;
707 const double tmp0_23 = -0.62200846792814621559/h2;
708 const double tmp0_7 = -0.78867513459481288225/h0;
709 #pragma omp parallel for
710 for (index_t k1=0; k1 < m_NE1; ++k1) {
711 for (index_t k0=0; k0 < m_NE0; ++k0) {
712 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
713 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
714 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
715 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
716 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
717 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
718 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
719 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
720 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
721 for (index_t i=0; i < numComp; ++i) {
722 o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_010[i]*tmp0_3 + f_100[i]*tmp0_1 + f_110[i]*tmp0_2;
723 o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_11 + f_100[i]*tmp0_9 + f_110[i]*tmp0_10;
724 o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_16 + f_001[i]*tmp0_19 + f_110[i]*tmp0_18 + f_111[i]*tmp0_21 + tmp0_17*(f_010[i] + f_100[i]) + tmp0_20*(f_011[i] + f_101[i]);
725 o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_010[i]*tmp0_3 + f_100[i]*tmp0_1 + f_110[i]*tmp0_2;
726 o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_12 + f_010[i]*tmp0_15 + f_100[i]*tmp0_13 + f_110[i]*tmp0_14;
727 o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_22 + f_001[i]*tmp0_26 + f_010[i]*tmp0_25 + f_011[i]*tmp0_28 + f_100[i]*tmp0_23 + f_101[i]*tmp0_27 + f_110[i]*tmp0_24 + f_111[i]*tmp0_29;
728 o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_4 + f_010[i]*tmp0_7 + f_100[i]*tmp0_5 + f_110[i]*tmp0_6;
729 o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_11 + f_100[i]*tmp0_9 + f_110[i]*tmp0_10;
730 o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_22 + f_001[i]*tmp0_26 + f_010[i]*tmp0_23 + f_011[i]*tmp0_27 + f_100[i]*tmp0_25 + f_101[i]*tmp0_28 + f_110[i]*tmp0_24 + f_111[i]*tmp0_29;
731 o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_4 + f_010[i]*tmp0_7 + f_100[i]*tmp0_5 + f_110[i]*tmp0_6;
732 o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_12 + f_010[i]*tmp0_15 + f_100[i]*tmp0_13 + f_110[i]*tmp0_14;
733 o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_30 + f_001[i]*tmp0_33 + f_110[i]*tmp0_32 + f_111[i]*tmp0_35 + tmp0_31*(f_010[i] + f_100[i]) + tmp0_34*(f_011[i] + f_101[i]);
734 } /* end of component loop i */
735 } /* end of k0 loop */
736 } /* end of k1 loop */
737 } /* end of face 4 */
738 if (m_faceOffset[5] > -1) {
739 const double tmp0_22 = 0.16666666666666666667/h2;
740 const double tmp0_16 = 0.62200846792814621559/h2;
741 const double tmp0_33 = -0.044658198738520451079/h2;
742 const double tmp0_0 = -0.78867513459481288225/h0;
743 const double tmp0_21 = -0.16666666666666666667/h2;
744 const double tmp0_17 = 0.16666666666666666667/h2;
745 const double tmp0_1 = 0.78867513459481288225/h0;
746 const double tmp0_20 = -0.044658198738520451079/h2;
747 const double tmp0_14 = 0.21132486540518711775/h1;
748 const double tmp0_2 = -0.21132486540518711775/h0;
749 const double tmp0_27 = -0.16666666666666666667/h2;
750 const double tmp0_15 = 0.78867513459481288225/h1;
751 const double tmp0_3 = 0.21132486540518711775/h0;
752 const double tmp0_26 = -0.16666666666666666667/h2;
753 const double tmp0_12 = -0.21132486540518711775/h1;
754 const double tmp0_25 = 0.16666666666666666667/h2;
755 const double tmp0_13 = -0.78867513459481288225/h1;
756 const double tmp0_24 = 0.044658198738520451079/h2;
757 const double tmp0_10 = 0.78867513459481288225/h1;
758 const double tmp0_11 = 0.21132486540518711775/h1;
759 const double tmp0_34 = -0.62200846792814621559/h2;
760 const double tmp0_35 = -0.16666666666666666667/h2;
761 const double tmp0_8 = -0.78867513459481288225/h1;
762 const double tmp0_29 = -0.62200846792814621559/h2;
763 const double tmp0_9 = -0.21132486540518711775/h1;
764 const double tmp0_30 = 0.044658198738520451079/h2;
765 const double tmp0_28 = -0.044658198738520451079/h2;
766 const double tmp0_32 = 0.62200846792814621559/h2;
767 const double tmp0_31 = 0.16666666666666666667/h2;
768 const double tmp0_18 = 0.044658198738520451079/h2;
769 const double tmp0_4 = -0.21132486540518711775/h0;
770 const double tmp0_19 = -0.62200846792814621559/h2;
771 const double tmp0_5 = 0.21132486540518711775/h0;
772 const double tmp0_6 = -0.78867513459481288225/h0;
773 const double tmp0_23 = 0.62200846792814621559/h2;
774 const double tmp0_7 = 0.78867513459481288225/h0;
775 #pragma omp parallel for
776 for (index_t k1=0; k1 < m_NE1; ++k1) {
777 for (index_t k0=0; k0 < m_NE0; ++k0) {
778 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
779 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
780 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
781 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
782 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
783 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
784 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
785 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
786 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
787 for (index_t i=0; i < numComp; ++i) {
788 o[INDEX3(i,0,0,numComp,3)] = f_001[i]*tmp0_0 + f_011[i]*tmp0_2 + f_101[i]*tmp0_1 + f_111[i]*tmp0_3;
789 o[INDEX3(i,1,0,numComp,3)] = f_001[i]*tmp0_8 + f_011[i]*tmp0_10 + f_101[i]*tmp0_9 + f_111[i]*tmp0_11;
790 o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_19 + f_001[i]*tmp0_16 + f_110[i]*tmp0_20 + f_111[i]*tmp0_18 + tmp0_17*(f_011[i] + f_101[i]) + tmp0_21*(f_010[i] + f_100[i]);
791 o[INDEX3(i,0,1,numComp,3)] = f_001[i]*tmp0_0 + f_011[i]*tmp0_2 + f_101[i]*tmp0_1 + f_111[i]*tmp0_3;
792 o[INDEX3(i,1,1,numComp,3)] = f_001[i]*tmp0_12 + f_011[i]*tmp0_14 + f_101[i]*tmp0_13 + f_111[i]*tmp0_15;
793 o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_22 + f_010[i]*tmp0_28 + f_011[i]*tmp0_24 + f_100[i]*tmp0_29 + f_101[i]*tmp0_23 + f_110[i]*tmp0_27 + f_111[i]*tmp0_25;
794 o[INDEX3(i,0,2,numComp,3)] = f_001[i]*tmp0_4 + f_011[i]*tmp0_6 + f_101[i]*tmp0_5 + f_111[i]*tmp0_7;
795 o[INDEX3(i,1,2,numComp,3)] = f_001[i]*tmp0_8 + f_011[i]*tmp0_10 + f_101[i]*tmp0_9 + f_111[i]*tmp0_11;
796 o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_22 + f_010[i]*tmp0_29 + f_011[i]*tmp0_23 + f_100[i]*tmp0_28 + f_101[i]*tmp0_24 + f_110[i]*tmp0_27 + f_111[i]*tmp0_25;
797 o[INDEX3(i,0,3,numComp,3)] = f_001[i]*tmp0_4 + f_011[i]*tmp0_6 + f_101[i]*tmp0_5 + f_111[i]*tmp0_7;
798 o[INDEX3(i,1,3,numComp,3)] = f_001[i]*tmp0_12 + f_011[i]*tmp0_14 + f_101[i]*tmp0_13 + f_111[i]*tmp0_15;
799 o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_33 + f_001[i]*tmp0_30 + f_110[i]*tmp0_34 + f_111[i]*tmp0_32 + tmp0_31*(f_011[i] + f_101[i]) + tmp0_35*(f_010[i] + f_100[i]);
800 } /* end of component loop i */
801 } /* end of k0 loop */
802 } /* end of k1 loop */
803 } /* end of face 5 */
804 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
805 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
806 /* GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
807 if (m_faceOffset[0] > -1) {
808 const double tmp0_0 = -0.25/h0;
809 const double tmp0_4 = -0.5/h2;
810 const double tmp0_1 = 0.25/h0;
811 const double tmp0_5 = 0.5/h2;
812 const double tmp0_2 = -0.5/h1;
813 const double tmp0_3 = 0.5/h1;
814 #pragma omp parallel for
815 for (index_t k2=0; k2 < m_NE2; ++k2) {
816 for (index_t k1=0; k1 < m_NE1; ++k1) {
817 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
818 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
819 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
820 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
821 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
822 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
823 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
824 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
825 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
826 for (index_t i=0; i < numComp; ++i) {
827 o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]) + tmp0_1*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
828 o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_000[i] + f_001[i]) + tmp0_3*(f_010[i] + f_011[i]);
829 o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_000[i] + f_010[i]) + tmp0_5*(f_001[i] + f_011[i]);
830 } /* end of component loop i */
831 } /* end of k1 loop */
832 } /* end of k2 loop */
833 } /* end of face 0 */
834 if (m_faceOffset[1] > -1) {
835 const double tmp0_0 = -0.25/h0;
836 const double tmp0_4 = 0.5/h2;
837 const double tmp0_1 = 0.25/h0;
838 const double tmp0_5 = -0.5/h2;
839 const double tmp0_2 = -0.5/h1;
840 const double tmp0_3 = 0.5/h1;
841 #pragma omp parallel for
842 for (index_t k2=0; k2 < m_NE2; ++k2) {
843 for (index_t k1=0; k1 < m_NE1; ++k1) {
844 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
845 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
846 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
847 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
848 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
849 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
850 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
851 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
852 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
853 for (index_t i=0; i < numComp; ++i) {
854 o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]) + tmp0_1*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
855 o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_100[i] + f_101[i]) + tmp0_3*(f_110[i] + f_111[i]);
856 o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_101[i] + f_111[i]) + tmp0_5*(f_100[i] + f_110[i]);
857 } /* end of component loop i */
858 } /* end of k1 loop */
859 } /* end of k2 loop */
860 } /* end of face 1 */
861 if (m_faceOffset[2] > -1) {
862 const double tmp0_0 = -0.5/h0;
863 const double tmp0_4 = -0.5/h2;
864 const double tmp0_1 = 0.5/h0;
865 const double tmp0_5 = 0.5/h2;
866 const double tmp0_2 = -0.25/h1;
867 const double tmp0_3 = 0.25/h1;
868 #pragma omp parallel for
869 for (index_t k2=0; k2 < m_NE2; ++k2) {
870 for (index_t k0=0; k0 < m_NE0; ++k0) {
871 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
872 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
873 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
874 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
875 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
876 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
877 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
878 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
879 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
880 for (index_t i=0; i < numComp; ++i) {
881 o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_000[i] + f_001[i]) + tmp0_1*(f_100[i] + f_101[i]);
882 o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_000[i] + f_001[i] + f_100[i] + f_101[i]) + tmp0_3*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
883 o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_000[i] + f_100[i]) + tmp0_5*(f_001[i] + f_101[i]);
884 } /* end of component loop i */
885 } /* end of k0 loop */
886 } /* end of k2 loop */
887 } /* end of face 2 */
888 if (m_faceOffset[3] > -1) {
889 const double tmp0_0 = -0.5/h0;
890 const double tmp0_4 = 0.5/h2;
891 const double tmp0_1 = 0.5/h0;
892 const double tmp0_5 = -0.5/h2;
893 const double tmp0_2 = 0.25/h1;
894 const double tmp0_3 = -0.25/h1;
895 #pragma omp parallel for
896 for (index_t k2=0; k2 < m_NE2; ++k2) {
897 for (index_t k0=0; k0 < m_NE0; ++k0) {
898 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
899 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
900 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
901 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
902 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
903 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
904 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
905 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
906 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
907 for (index_t i=0; i < numComp; ++i) {
908 o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_010[i] + f_011[i]) + tmp0_1*(f_110[i] + f_111[i]);
909 o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_010[i] + f_011[i] + f_110[i] + f_111[i]) + tmp0_3*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
910 o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_011[i] + f_111[i]) + tmp0_5*(f_010[i] + f_110[i]);
911 } /* end of component loop i */
912 } /* end of k0 loop */
913 } /* end of k2 loop */
914 } /* end of face 3 */
915 if (m_faceOffset[4] > -1) {
916 const double tmp0_0 = -0.5/h0;
917 const double tmp0_4 = -0.25/h2;
918 const double tmp0_1 = 0.5/h0;
919 const double tmp0_5 = 0.25/h2;
920 const double tmp0_2 = -0.5/h1;
921 const double tmp0_3 = 0.5/h1;
922 #pragma omp parallel for
923 for (index_t k1=0; k1 < m_NE1; ++k1) {
924 for (index_t k0=0; k0 < m_NE0; ++k0) {
925 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
926 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
927 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
928 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
929 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
930 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
931 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
932 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
933 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
934 for (index_t i=0; i < numComp; ++i) {
935 o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_000[i] + f_010[i]) + tmp0_1*(f_100[i] + f_110[i]);
936 o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_000[i] + f_100[i]) + tmp0_3*(f_010[i] + f_110[i]);
937 o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_000[i] + f_010[i] + f_100[i] + f_110[i]) + tmp0_5*(f_001[i] + f_011[i] + f_101[i] + f_111[i]);
938 } /* end of component loop i */
939 } /* end of k0 loop */
940 } /* end of k1 loop */
941 } /* end of face 4 */
942 if (m_faceOffset[5] > -1) {
943 const double tmp0_0 = -0.5/h0;
944 const double tmp0_4 = 0.25/h2;
945 const double tmp0_1 = 0.5/h0;
946 const double tmp0_5 = -0.25/h2;
947 const double tmp0_2 = -0.5/h1;
948 const double tmp0_3 = 0.5/h1;
949 #pragma omp parallel for
950 for (index_t k1=0; k1 < m_NE1; ++k1) {
951 for (index_t k0=0; k0 < m_NE0; ++k0) {
952 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
953 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
954 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
955 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
956 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
957 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
958 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
959 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
960 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
961 for (index_t i=0; i < numComp; ++i) {
962 o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_001[i] + f_011[i]) + tmp0_1*(f_101[i] + f_111[i]);
963 o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_001[i] + f_101[i]) + tmp0_3*(f_011[i] + f_111[i]);
964 o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_001[i] + f_011[i] + f_101[i] + f_111[i]) + tmp0_5*(f_000[i] + f_010[i] + f_100[i] + f_110[i]);
965 } /* end of component loop i */
966 } /* end of k0 loop */
967 } /* end of k1 loop */
968 } /* end of face 5 */
969 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
970 } else {
971 stringstream msg;
972 msg << "setToGradient() not implemented for "
973 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
974 throw RipleyException(msg.str());
975 }
976 }
977
978 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
979 {
980 escript::Data& in = *const_cast<escript::Data*>(&arg);
981 const dim_t numComp = in.getDataPointSize();
982 const double h0 = m_l0/m_gNE0;
983 const double h1 = m_l1/m_gNE1;
984 const double h2 = m_l2/m_gNE2;
985 if (arg.getFunctionSpace().getTypeCode() == Elements) {
986 const double w_0 = h0*h1*h2/8.;
987 #pragma omp parallel
988 {
989 vector<double> int_local(numComp, 0);
990 #pragma omp for
991 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
992 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
993 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
994 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
995 for (index_t i=0; i < numComp; ++i) {
996 const register double f_0 = f[INDEX2(i,0,numComp)];
997 const register double f_1 = f[INDEX2(i,1,numComp)];
998 const register double f_2 = f[INDEX2(i,2,numComp)];
999 const register double f_3 = f[INDEX2(i,3,numComp)];
1000 const register double f_4 = f[INDEX2(i,4,numComp)];
1001 const register double f_5 = f[INDEX2(i,5,numComp)];
1002 const register double f_6 = f[INDEX2(i,6,numComp)];
1003 const register double f_7 = f[INDEX2(i,7,numComp)];
1004 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
1005 } /* end of component loop i */
1006 } /* end of k0 loop */
1007 } /* end of k1 loop */
1008 } /* end of k2 loop */
1009
1010 #pragma omp critical
1011 for (index_t i=0; i<numComp; i++)
1012 integrals[i]+=int_local[i];
1013 }
1014 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
1015 const double w_0 = h0*h1*h2;
1016 #pragma omp parallel
1017 {
1018 vector<double> int_local(numComp, 0);
1019 #pragma omp for
1020 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1021 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1022 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1023 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
1024 for (index_t i=0; i < numComp; ++i) {
1025 int_local[i]+=f[i]*w_0;
1026 } /* end of component loop i */
1027 } /* end of k0 loop */
1028 } /* end of k1 loop */
1029 } /* end of k2 loop */
1030
1031 #pragma omp critical
1032 for (index_t i=0; i<numComp; i++)
1033 integrals[i]+=int_local[i];
1034 }
1035 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
1036 const double w_0 = h1*h2/4.;
1037 const double w_1 = h0*h2/4.;
1038 const double w_2 = h0*h1/4.;
1039 #pragma omp parallel
1040 {
1041 vector<double> int_local(numComp, 0);
1042 if (m_faceOffset[0] > -1) {
1043 #pragma omp for
1044 for (index_t k2=0; k2 < m_NE2; ++k2) {
1045 for (index_t k1=0; k1 < m_NE1; ++k1) {
1046 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1047 for (index_t i=0; i < numComp; ++i) {
1048 const register double f_0 = f[INDEX2(i,0,numComp)];
1049 const register double f_1 = f[INDEX2(i,1,numComp)];
1050 const register double f_2 = f[INDEX2(i,2,numComp)];
1051 const register double f_3 = f[INDEX2(i,3,numComp)];
1052 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1053 } /* end of component loop i */
1054 } /* end of k1 loop */
1055 } /* end of k2 loop */
1056 }
1057
1058 if (m_faceOffset[1] > -1) {
1059 #pragma omp for
1060 for (index_t k2=0; k2 < m_NE2; ++k2) {
1061 for (index_t k1=0; k1 < m_NE1; ++k1) {
1062 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1063 for (index_t i=0; i < numComp; ++i) {
1064 const register double f_0 = f[INDEX2(i,0,numComp)];
1065 const register double f_1 = f[INDEX2(i,1,numComp)];
1066 const register double f_2 = f[INDEX2(i,2,numComp)];
1067 const register double f_3 = f[INDEX2(i,3,numComp)];
1068 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
1069 } /* end of component loop i */
1070 } /* end of k1 loop */
1071 } /* end of k2 loop */
1072 }
1073
1074 if (m_faceOffset[2] > -1) {
1075 #pragma omp for
1076 for (index_t k2=0; k2 < m_NE2; ++k2) {
1077 for (index_t k0=0; k0 < m_NE0; ++k0) {
1078 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1079 for (index_t i=0; i < numComp; ++i) {
1080 const register double f_0 = f[INDEX2(i,0,numComp)];
1081 const register double f_1 = f[INDEX2(i,1,numComp)];
1082 const register double f_2 = f[INDEX2(i,2,numComp)];
1083 const register double f_3 = f[INDEX2(i,3,numComp)];
1084 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1085 } /* end of component loop i */
1086 } /* end of k1 loop */
1087 } /* end of k2 loop */
1088 }
1089
1090 if (m_faceOffset[3] > -1) {
1091 #pragma omp for
1092 for (index_t k2=0; k2 < m_NE2; ++k2) {
1093 for (index_t k0=0; k0 < m_NE0; ++k0) {
1094 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1095 for (index_t i=0; i < numComp; ++i) {
1096 const register double f_0 = f[INDEX2(i,0,numComp)];
1097 const register double f_1 = f[INDEX2(i,1,numComp)];
1098 const register double f_2 = f[INDEX2(i,2,numComp)];
1099 const register double f_3 = f[INDEX2(i,3,numComp)];
1100 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
1101 } /* end of component loop i */
1102 } /* end of k1 loop */
1103 } /* end of k2 loop */
1104 }
1105
1106 if (m_faceOffset[4] > -1) {
1107 #pragma omp for
1108 for (index_t k1=0; k1 < m_NE1; ++k1) {
1109 for (index_t k0=0; k0 < m_NE0; ++k0) {
1110 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1111 for (index_t i=0; i < numComp; ++i) {
1112 const register double f_0 = f[INDEX2(i,0,numComp)];
1113 const register double f_1 = f[INDEX2(i,1,numComp)];
1114 const register double f_2 = f[INDEX2(i,2,numComp)];
1115 const register double f_3 = f[INDEX2(i,3,numComp)];
1116 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1117 } /* end of component loop i */
1118 } /* end of k1 loop */
1119 } /* end of k2 loop */
1120 }
1121
1122 if (m_faceOffset[5] > -1) {
1123 #pragma omp for
1124 for (index_t k1=0; k1 < m_NE1; ++k1) {
1125 for (index_t k0=0; k0 < m_NE0; ++k0) {
1126 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1127 for (index_t i=0; i < numComp; ++i) {
1128 const register double f_0 = f[INDEX2(i,0,numComp)];
1129 const register double f_1 = f[INDEX2(i,1,numComp)];
1130 const register double f_2 = f[INDEX2(i,2,numComp)];
1131 const register double f_3 = f[INDEX2(i,3,numComp)];
1132 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
1133 } /* end of component loop i */
1134 } /* end of k1 loop */
1135 } /* end of k2 loop */
1136 }
1137
1138 #pragma omp critical
1139 for (index_t i=0; i<numComp; i++)
1140 integrals[i]+=int_local[i];
1141 }
1142
1143 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1144 const double w_0 = h1*h2;
1145 const double w_1 = h0*h2;
1146 const double w_2 = h0*h1;
1147 #pragma omp parallel
1148 {
1149 vector<double> int_local(numComp, 0);
1150 if (m_faceOffset[0] > -1) {
1151 #pragma omp for
1152 for (index_t k2=0; k2 < m_NE2; ++k2) {
1153 for (index_t k1=0; k1 < m_NE1; ++k1) {
1154 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1155 for (index_t i=0; i < numComp; ++i) {
1156 int_local[i]+=f[i]*w_0;
1157 } /* end of component loop i */
1158 } /* end of k1 loop */
1159 } /* end of k2 loop */
1160 }
1161
1162 if (m_faceOffset[1] > -1) {
1163 #pragma omp for
1164 for (index_t k2=0; k2 < m_NE2; ++k2) {
1165 for (index_t k1=0; k1 < m_NE1; ++k1) {
1166 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1167 for (index_t i=0; i < numComp; ++i) {
1168 int_local[i]+=f[i]*w_0;
1169 } /* end of component loop i */
1170 } /* end of k1 loop */
1171 } /* end of k2 loop */
1172 }
1173
1174 if (m_faceOffset[2] > -1) {
1175 #pragma omp for
1176 for (index_t k2=0; k2 < m_NE2; ++k2) {
1177 for (index_t k0=0; k0 < m_NE0; ++k0) {
1178 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1179 for (index_t i=0; i < numComp; ++i) {
1180 int_local[i]+=f[i]*w_1;
1181 } /* end of component loop i */
1182 } /* end of k1 loop */
1183 } /* end of k2 loop */
1184 }
1185
1186 if (m_faceOffset[3] > -1) {
1187 #pragma omp for
1188 for (index_t k2=0; k2 < m_NE2; ++k2) {
1189 for (index_t k0=0; k0 < m_NE0; ++k0) {
1190 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1191 for (index_t i=0; i < numComp; ++i) {
1192 int_local[i]+=f[i]*w_1;
1193 } /* end of component loop i */
1194 } /* end of k1 loop */
1195 } /* end of k2 loop */
1196 }
1197
1198 if (m_faceOffset[4] > -1) {
1199 #pragma omp for
1200 for (index_t k1=0; k1 < m_NE1; ++k1) {
1201 for (index_t k0=0; k0 < m_NE0; ++k0) {
1202 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1203 for (index_t i=0; i < numComp; ++i) {
1204 int_local[i]+=f[i]*w_2;
1205 } /* end of component loop i */
1206 } /* end of k1 loop */
1207 } /* end of k2 loop */
1208 }
1209
1210 if (m_faceOffset[5] > -1) {
1211 #pragma omp for
1212 for (index_t k1=0; k1 < m_NE1; ++k1) {
1213 for (index_t k0=0; k0 < m_NE0; ++k0) {
1214 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1215 for (index_t i=0; i < numComp; ++i) {
1216 int_local[i]+=f[i]*w_2;
1217 } /* end of component loop i */
1218 } /* end of k1 loop */
1219 } /* end of k2 loop */
1220 }
1221
1222 #pragma omp critical
1223 for (index_t i=0; i<numComp; i++)
1224 integrals[i]+=int_local[i];
1225 }
1226
1227 } else {
1228 stringstream msg;
1229 msg << "setToIntegrals() not implemented for "
1230 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1231 throw RipleyException(msg.str());
1232 }
1233 }
1234
1235 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1236 bool reducedColOrder) const
1237 {
1238 if (reducedRowOrder || reducedColOrder)
1239 throw RipleyException("getPattern() not implemented for reduced order");
1240
1241 throw RipleyException("getPattern() not implemented");
1242 }
1243
1244 void Brick::Print_Mesh_Info(const bool full) const
1245 {
1246 RipleyDomain::Print_Mesh_Info(full);
1247 if (full) {
1248 cout << " Id Coordinates" << endl;
1249 cout.precision(15);
1250 cout.setf(ios::scientific, ios::floatfield);
1251 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1252 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1253 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1254 for (index_t i=0; i < getNumNodes(); i++) {
1255 cout << " " << setw(5) << m_nodeId[i]
1256 << " " << xdx.first+(i%m_N0)*xdx.second
1257 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1258 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1259 }
1260 }
1261 }
1262
1263 IndexVector Brick::getNumNodesPerDim() const
1264 {
1265 IndexVector ret;
1266 ret.push_back(m_N0);
1267 ret.push_back(m_N1);
1268 ret.push_back(m_N2);
1269 return ret;
1270 }
1271
1272 IndexVector Brick::getNumElementsPerDim() const
1273 {
1274 IndexVector ret;
1275 ret.push_back(m_NE0);
1276 ret.push_back(m_NE1);
1277 ret.push_back(m_NE2);
1278 return ret;
1279 }
1280
1281 IndexVector Brick::getNumFacesPerBoundary() const
1282 {
1283 IndexVector ret(6, 0);
1284 //left
1285 if (m_offset0==0)
1286 ret[0]=m_NE1*m_NE2;
1287 //right
1288 if (m_mpiInfo->rank%m_NX==m_NX-1)
1289 ret[1]=m_NE1*m_NE2;
1290 //bottom
1291 if (m_offset1==0)
1292 ret[2]=m_NE0*m_NE2;
1293 //top
1294 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1295 ret[3]=m_NE0*m_NE2;
1296 //front
1297 if (m_offset2==0)
1298 ret[4]=m_NE0*m_NE1;
1299 //back
1300 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1301 ret[5]=m_NE0*m_NE1;
1302 return ret;
1303 }
1304
1305 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1306 {
1307 if (dim==0)
1308 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1309 else if (dim==1)
1310 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1311 else if (dim==2)
1312 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1313
1314 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1315 }
1316
1317
1318 //protected
1319 dim_t Brick::getNumFaceElements() const
1320 {
1321 dim_t n=0;
1322 //left
1323 if (m_offset0==0)
1324 n+=m_NE1*m_NE2;
1325 //right
1326 if (m_mpiInfo->rank%m_NX==m_NX-1)
1327 n+=m_NE1*m_NE2;
1328 //bottom
1329 if (m_offset1==0)
1330 n+=m_NE0*m_NE2;
1331 //top
1332 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1333 n+=m_NE0*m_NE2;
1334 //front
1335 if (m_offset2==0)
1336 n+=m_NE0*m_NE1;
1337 //back
1338 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1339 n+=m_NE0*m_NE1;
1340
1341 return n;
1342 }
1343
1344 //protected
1345 void Brick::assembleCoordinates(escript::Data& arg) const
1346 {
1347 escriptDataC x = arg.getDataC();
1348 int numDim = m_numDim;
1349 if (!isDataPointShapeEqual(&x, 1, &numDim))
1350 throw RipleyException("setToX: Invalid Data object shape");
1351 if (!numSamplesEqual(&x, 1, getNumNodes()))
1352 throw RipleyException("setToX: Illegal number of samples in Data object");
1353
1354 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1355 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1356 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1357 arg.requireWrite();
1358 #pragma omp parallel for
1359 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1360 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1361 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1362 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1363 point[0] = xdx.first+i0*xdx.second;
1364 point[1] = ydy.first+i1*ydy.second;
1365 point[2] = zdz.first+i2*zdz.second;
1366 }
1367 }
1368 }
1369 }
1370
1371 //private
1372 void Brick::populateSampleIds()
1373 {
1374 // identifiers are ordered from left to right, bottom to top, front to back
1375 // on each rank, except for the shared nodes which are owned by the rank
1376 // below / to the left / to the front of the current rank
1377
1378 // build node distribution vector first.
1379 // m_nodeDistribution[i] is the first node id on rank i, that is
1380 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1381 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1382 m_nodeDistribution[1]=getNumNodes();
1383 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1384 const index_t x = k%m_NX;
1385 const index_t y = k%(m_NX*m_NY)/m_NX;
1386 const index_t z = k/(m_NX*m_NY);
1387 index_t numNodes=getNumNodes();
1388 if (x>0)
1389 numNodes-=m_N1*m_N2;
1390 if (y>0)
1391 numNodes-=m_N0*m_N2;
1392 if (z>0)
1393 numNodes-=m_N0*m_N1;
1394 // if an edge was subtracted twice add it back
1395 if (x>0 && y>0)
1396 numNodes+=m_N2;
1397 if (x>0 && z>0)
1398 numNodes+=m_N1;
1399 if (y>0 && z>0)
1400 numNodes+=m_N0;
1401 // the corner node was removed 3x and added back 3x, so subtract it
1402 if (x>0 && y>0 && z>0)
1403 numNodes--;
1404 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1405 }
1406 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1407
1408 m_nodeId.resize(getNumNodes());
1409
1410 // the bottom, left and front planes are not owned by this rank so the
1411 // identifiers need to be computed accordingly
1412 const index_t left = (m_offset0==0 ? 0 : 1);
1413 const index_t bottom = (m_offset1==0 ? 0 : 1);
1414 const index_t front = (m_offset2==0 ? 0 : 1);
1415
1416 // case 1: all nodes on left plane are owned by rank on the left
1417 if (left>0) {
1418 const int neighbour=m_mpiInfo->rank-1;
1419 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1420 const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1421 #pragma omp parallel for
1422 for (dim_t i2=front; i2<m_N2; i2++) {
1423 for (dim_t i1=bottom; i1<m_N1; i1++) {
1424 m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1425 + (i1-bottom+1)*leftN0
1426 + (i2-front)*leftN0*leftN1 - 1;
1427 }
1428 }
1429 }
1430 // case 2: all nodes on bottom plane are owned by rank below
1431 if (bottom>0) {
1432 const int neighbour=m_mpiInfo->rank-m_NX;
1433 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1434 const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1435 #pragma omp parallel for
1436 for (dim_t i2=front; i2<m_N2; i2++) {
1437 for (dim_t i0=left; i0<m_N0; i0++) {
1438 m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1439 + bottomN0*(bottomN1-1)
1440 + (i2-front)*bottomN0*bottomN1 + i0-left;
1441 }
1442 }
1443 }
1444 // case 3: all nodes on front plane are owned by rank in front
1445 if (front>0) {
1446 const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
1447 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1448 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1449 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1450 #pragma omp parallel for
1451 for (dim_t i1=bottom; i1<m_N1; i1++) {
1452 for (dim_t i0=left; i0<m_N0; i0++) {
1453 m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
1454 + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
1455 }
1456 }
1457 }
1458 // case 4: nodes on front bottom edge are owned by the corresponding rank
1459 if (front>0 && bottom>0) {
1460 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
1461 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1462 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1463 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1464 #pragma omp parallel for
1465 for (dim_t i0=left; i0<m_N0; i0++) {
1466 m_nodeId[i0]=m_nodeDistribution[neighbour]
1467 + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
1468 }
1469 }
1470 // case 5: nodes on left bottom edge are owned by the corresponding rank
1471 if (left>0 && bottom>0) {
1472 const int neighbour=m_mpiInfo->rank-m_NX-1;
1473 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1474 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1475 #pragma omp parallel for
1476 for (dim_t i2=front; i2<m_N2; i2++) {
1477 m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1478 + (1+i2-front)*N0*N1-1;
1479 }
1480 }
1481 // case 6: nodes on left front edge are owned by the corresponding rank
1482 if (left>0 && front>0) {
1483 const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
1484 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1485 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1486 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1487 #pragma omp parallel for
1488 for (dim_t i1=bottom; i1<m_N1; i1++) {
1489 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1490 + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
1491 }
1492 }
1493 // case 7: bottom-left-front corner node owned by corresponding rank
1494 if (left>0 && bottom>0 && front>0) {
1495 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
1496 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1497 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1498 const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
1499 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
1500 }
1501
1502 // the rest of the id's are contiguous
1503 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1504 #pragma omp parallel for
1505 for (dim_t i2=front; i2<m_N2; i2++) {
1506 for (dim_t i1=bottom; i1<m_N1; i1++) {
1507 for (dim_t i0=left; i0<m_N0; i0++) {
1508 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
1509 +(i1-bottom)*(m_N0-left)
1510 +(i2-front)*(m_N0-left)*(m_N1-bottom);
1511 }
1512 }
1513 }
1514
1515 // elements
1516 m_elementId.resize(getNumElements());
1517 #pragma omp parallel for
1518 for (dim_t k=0; k<getNumElements(); k++) {
1519 m_elementId[k]=k;
1520 }
1521
1522 // face elements
1523 m_faceId.resize(getNumFaceElements());
1524 #pragma omp parallel for
1525 for (dim_t k=0; k<getNumFaceElements(); k++) {
1526 m_faceId[k]=k;
1527 }
1528
1529 // generate face offset vector
1530 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1531 m_faceOffset.assign(facesPerEdge.size(), -1);
1532 index_t offset=0;
1533 for (size_t i=0; i<facesPerEdge.size(); i++) {
1534 if (facesPerEdge[i]>0) {
1535 m_faceOffset[i]=offset;
1536 offset+=facesPerEdge[i];
1537 }
1538 }
1539 }
1540
1541 //protected
1542 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1543 bool reduced) const
1544 {
1545 const dim_t numComp = in.getDataPointSize();
1546 if (reduced) {
1547 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1548 const double tmp0_0 = 0.12500000000000000000;
1549 #pragma omp parallel for
1550 for (index_t k2=0; k2 < m_NE2; ++k2) {
1551 for (index_t k1=0; k1 < m_NE1; ++k1) {
1552 for (index_t k0=0; k0 < m_NE0; ++k0) {
1553 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1554 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1555 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1556 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1557 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1558 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1559 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1560 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1561 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1562 for (index_t i=0; i < numComp; ++i) {
1563 o[INDEX2(i,numComp,0)] = tmp0_0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1564 } /* end of component loop i */
1565 } /* end of k0 loop */
1566 } /* end of k1 loop */
1567 } /* end of k2 loop */
1568 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1569 } else {
1570 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1571 const double tmp0_3 = 0.0094373878376559314545;
1572 const double tmp0_2 = 0.035220810900864519624;
1573 const double tmp0_1 = 0.13144585576580214704;
1574 const double tmp0_0 = 0.49056261216234406855;
1575 #pragma omp parallel for
1576 for (index_t k2=0; k2 < m_NE2; ++k2) {
1577 for (index_t k1=0; k1 < m_NE1; ++k1) {
1578 for (index_t k0=0; k0 < m_NE0; ++k0) {
1579 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1580 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1581 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1582 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1583 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1584 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1585 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1586 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1587 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1588 for (index_t i=0; i < numComp; ++i) {
1589 o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i] + f_100[i]) + tmp0_2*(f_011[i] + f_101[i] + f_110[i]);
1590 o[INDEX2(i,numComp,1)] = f_011[i]*tmp0_3 + f_100[i]*tmp0_0 + tmp0_1*(f_000[i] + f_101[i] + f_110[i]) + tmp0_2*(f_001[i] + f_010[i] + f_111[i]);
1591 o[INDEX2(i,numComp,2)] = f_010[i]*tmp0_0 + f_101[i]*tmp0_3 + tmp0_1*(f_000[i] + f_011[i] + f_110[i]) + tmp0_2*(f_001[i] + f_100[i] + f_111[i]);
1592 o[INDEX2(i,numComp,3)] = f_001[i]*tmp0_3 + f_110[i]*tmp0_0 + tmp0_1*(f_010[i] + f_100[i] + f_111[i]) + tmp0_2*(f_000[i] + f_011[i] + f_101[i]);
1593 o[INDEX2(i,numComp,4)] = f_001[i]*tmp0_0 + f_110[i]*tmp0_3 + tmp0_1*(f_000[i] + f_011[i] + f_101[i]) + tmp0_2*(f_010[i] + f_100[i] + f_111[i]);
1594 o[INDEX2(i,numComp,5)] = f_010[i]*tmp0_3 + f_101[i]*tmp0_0 + tmp0_1*(f_001[i] + f_100[i] + f_111[i]) + tmp0_2*(f_000[i] + f_011[i] + f_110[i]);
1595 o[INDEX2(i,numComp,6)] = f_011[i]*tmp0_0 + f_100[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i] + f_111[i]) + tmp0_2*(f_000[i] + f_101[i] + f_110[i]);
1596 o[INDEX2(i,numComp,7)] = f_000[i]*tmp0_3 + f_111[i]*tmp0_0 + tmp0_1*(f_011[i] + f_101[i] + f_110[i]) + tmp0_2*(f_001[i] + f_010[i] + f_100[i]);
1597 } /* end of component loop i */
1598 } /* end of k0 loop */
1599 } /* end of k1 loop */
1600 } /* end of k2 loop */
1601 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1602 }
1603 }
1604
1605 //protected
1606 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1607 bool reduced) const
1608 {
1609 const dim_t numComp = in.getDataPointSize();
1610 if (reduced) {
1611 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1612 if (m_faceOffset[0] > -1) {
1613 const double tmp0_0 = 0.25000000000000000000;
1614 #pragma omp parallel for
1615 for (index_t k2=0; k2 < m_NE2; ++k2) {
1616 for (index_t k1=0; k1 < m_NE1; ++k1) {
1617 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1618 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1619 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1620 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1621 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1622 for (index_t i=0; i < numComp; ++i) {
1623 o[INDEX2(i,numComp,0)] = tmp0_0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1624 } /* end of component loop i */
1625 } /* end of k1 loop */
1626 } /* end of k2 loop */
1627 } /* end of face 0 */
1628 if (m_faceOffset[1] > -1) {
1629 const double tmp0_0 = 0.25000000000000000000;
1630 #pragma omp parallel for
1631 for (index_t k2=0; k2 < m_NE2; ++k2) {
1632 for (index_t k1=0; k1 < m_NE1; ++k1) {
1633 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1634 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1635 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1636 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1637 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1638 for (index_t i=0; i < numComp; ++i) {
1639 o[INDEX2(i,numComp,0)] = tmp0_0*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1640 } /* end of component loop i */
1641 } /* end of k1 loop */
1642 } /* end of k2 loop */
1643 } /* end of face 1 */
1644 if (m_faceOffset[2] > -1) {
1645 const double tmp0_0 = 0.25000000000000000000;
1646 #pragma omp parallel for
1647 for (index_t k2=0; k2 < m_NE2; ++k2) {
1648 for (index_t k0=0; k0 < m_NE0; ++k0) {
1649 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1650 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1651 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1652 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1653 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1654 for (index_t i=0; i < numComp; ++i) {
1655 o[INDEX2(i,numComp,0)] = tmp0_0*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
1656 } /* end of component loop i */
1657 } /* end of k0 loop */
1658 } /* end of k2 loop */
1659 } /* end of face 2 */
1660 if (m_faceOffset[3] > -1) {
1661 const double tmp0_0 = 0.25000000000000000000;
1662 #pragma omp parallel for
1663 for (index_t k2=0; k2 < m_NE2; ++k2) {
1664 for (index_t k0=0; k0 < m_NE0; ++k0) {
1665 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1666 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1667 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1668 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1669 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1670 for (index_t i=0; i < numComp; ++i) {
1671 o[INDEX2(i,numComp,0)] = tmp0_0*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
1672 } /* end of component loop i */
1673 } /* end of k0 loop */
1674 } /* end of k2 loop */
1675 } /* end of face 3 */
1676 if (m_faceOffset[4] > -1) {
1677 const double tmp0_0 = 0.25000000000000000000;
1678 #pragma omp parallel for
1679 for (index_t k1=0; k1 < m_NE1; ++k1) {
1680 for (index_t k0=0; k0 < m_NE0; ++k0) {
1681 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1682 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1683 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1684 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1685 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1686 for (index_t i=0; i < numComp; ++i) {
1687 o[INDEX2(i,numComp,0)] = tmp0_0*(f_000[i] + f_010[i] + f_100[i] + f_110[i]);
1688 } /* end of component loop i */
1689 } /* end of k0 loop */
1690 } /* end of k1 loop */
1691 } /* end of face 4 */
1692 if (m_faceOffset[5] > -1) {
1693 const double tmp0_0 = 0.25000000000000000000;
1694 #pragma omp parallel for
1695 for (index_t k1=0; k1 < m_NE1; ++k1) {
1696 for (index_t k0=0; k0 < m_NE0; ++k0) {
1697 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1698 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1699 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1700 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1701 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1702 for (index_t i=0; i < numComp; ++i) {
1703 o[INDEX2(i,numComp,0)] = tmp0_0*(f_001[i] + f_011[i] + f_101[i] + f_111[i]);
1704 } /* end of component loop i */
1705 } /* end of k0 loop */
1706 } /* end of k1 loop */
1707 } /* end of face 5 */
1708 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1709 } else {
1710 /* GENERATOR SNIP_INTERPOLATE_FACES TOP */
1711 if (m_faceOffset[0] > -1) {
1712 const double tmp0_2 = 0.044658198738520451079;
1713 const double tmp0_1 = 0.16666666666666666667;
1714 const double tmp0_0 = 0.62200846792814621559;
1715 #pragma omp parallel for
1716 for (index_t k2=0; k2 < m_NE2; ++k2) {