/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3746 - (show annotations)
Thu Dec 15 00:02:22 2011 UTC (7 years, 4 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 93863 byte(s)
In Ripley Solution==ContinuousFunction and ReducedSolution==ReducedCF now.
Removed a test in escript that relied on failure when trying to tag Data on
Solution/ReducedSolution.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52
53 // local number of elements
54 m_NE0 = n0/m_NX;
55 m_NE1 = n1/m_NY;
56 m_NE2 = n2/m_NZ;
57 // local number of nodes (not necessarily owned)
58 m_N0 = m_NE0+1;
59 m_N1 = m_NE1+1;
60 m_N2 = m_NE2+1;
61 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63 m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64 m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65 populateSampleIds();
66 }
67
68
69 Brick::~Brick()
70 {
71 }
72
73 string Brick::getDescription() const
74 {
75 return "ripley::Brick";
76 }
77
78 bool Brick::operator==(const AbstractDomain& other) const
79 {
80 const Brick* o=dynamic_cast<const Brick*>(&other);
81 if (o) {
82 return (RipleyDomain::operator==(other) &&
83 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
84 && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
85 && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
86 }
87
88 return false;
89 }
90
91 void Brick::dump(const string& fileName) const
92 {
93 #if USE_SILO
94 string fn(fileName);
95 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
96 fn+=".silo";
97 }
98
99 const int NUM_SILO_FILES = 1;
100 const char* blockDirFmt = "/block%04d";
101 int driver=DB_HDF5;
102 string siloPath;
103 DBfile* dbfile = NULL;
104
105 #ifdef ESYS_MPI
106 PMPIO_baton_t* baton = NULL;
107 #endif
108
109 if (m_mpiInfo->size > 1) {
110 #ifdef ESYS_MPI
111 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
112 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
113 PMPIO_DefaultClose, (void*)&driver);
114 // try the fallback driver in case of error
115 if (!baton && driver != DB_PDB) {
116 driver = DB_PDB;
117 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
118 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
119 PMPIO_DefaultClose, (void*)&driver);
120 }
121 if (baton) {
122 char str[64];
123 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
124 siloPath = str;
125 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
126 }
127 #endif
128 } else {
129 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130 getDescription().c_str(), driver);
131 // try the fallback driver in case of error
132 if (!dbfile && driver != DB_PDB) {
133 driver = DB_PDB;
134 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
135 getDescription().c_str(), driver);
136 }
137 }
138
139 if (!dbfile)
140 throw RipleyException("dump: Could not create Silo file");
141
142 /*
143 if (driver==DB_HDF5) {
144 // gzip level 1 already provides good compression with minimal
145 // performance penalty. Some tests showed that gzip levels >3 performed
146 // rather badly on escript data both in terms of time and space
147 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
148 }
149 */
150
151 boost::scoped_ptr<double> x(new double[m_N0]);
152 boost::scoped_ptr<double> y(new double[m_N1]);
153 boost::scoped_ptr<double> z(new double[m_N2]);
154 double* coords[3] = { x.get(), y.get(), z.get() };
155 pair<double,double> xdx = getFirstCoordAndSpacing(0);
156 pair<double,double> ydy = getFirstCoordAndSpacing(1);
157 pair<double,double> zdz = getFirstCoordAndSpacing(2);
158 #pragma omp parallel
159 {
160 #pragma omp for
161 for (dim_t i0 = 0; i0 < m_N0; i0++) {
162 coords[0][i0]=xdx.first+i0*xdx.second;
163 }
164 #pragma omp for
165 for (dim_t i1 = 0; i1 < m_N1; i1++) {
166 coords[1][i1]=ydy.first+i1*ydy.second;
167 }
168 #pragma omp for
169 for (dim_t i2 = 0; i2 < m_N2; i2++) {
170 coords[2][i2]=zdz.first+i2*zdz.second;
171 }
172 }
173 IndexVector dims = getNumNodesPerDim();
174 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
175 DB_COLLINEAR, NULL);
176
177 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
178 NULL, 0, DB_INT, DB_NODECENT, NULL);
179
180 // write element ids
181 dims = getNumElementsPerDim();
182 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
183 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
184
185 // rank 0 writes multimesh and multivar
186 if (m_mpiInfo->rank == 0) {
187 vector<string> tempstrings;
188 vector<char*> names;
189 for (dim_t i=0; i<m_mpiInfo->size; i++) {
190 stringstream path;
191 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
192 tempstrings.push_back(path.str());
193 names.push_back((char*)tempstrings.back().c_str());
194 }
195 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
196 DBSetDir(dbfile, "/");
197 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
198 &types[0], NULL);
199 tempstrings.clear();
200 names.clear();
201 for (dim_t i=0; i<m_mpiInfo->size; i++) {
202 stringstream path;
203 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
204 tempstrings.push_back(path.str());
205 names.push_back((char*)tempstrings.back().c_str());
206 }
207 types.assign(m_mpiInfo->size, DB_QUADVAR);
208 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
209 &types[0], NULL);
210 tempstrings.clear();
211 names.clear();
212 for (dim_t i=0; i<m_mpiInfo->size; i++) {
213 stringstream path;
214 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
215 tempstrings.push_back(path.str());
216 names.push_back((char*)tempstrings.back().c_str());
217 }
218 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
219 &types[0], NULL);
220 }
221
222 if (m_mpiInfo->size > 1) {
223 #ifdef ESYS_MPI
224 PMPIO_HandOffBaton(baton, dbfile);
225 PMPIO_Finish(baton);
226 #endif
227 } else {
228 DBClose(dbfile);
229 }
230
231 #else // USE_SILO
232 throw RipleyException("dump(): no Silo support");
233 #endif
234 }
235
236 const int* Brick::borrowSampleReferenceIDs(int fsType) const
237 {
238 switch (fsType) {
239 case Nodes:
240 return &m_nodeId[0];
241 case Elements:
242 case ReducedElements:
243 return &m_elementId[0];
244 case ReducedFaceElements:
245 return &m_faceId[0];
246 default:
247 break;
248 }
249
250 stringstream msg;
251 msg << "borrowSampleReferenceIDs() not implemented for function space type "
252 << fsType;
253 throw RipleyException(msg.str());
254 }
255
256 bool Brick::ownSample(int fsCode, index_t id) const
257 {
258 #ifdef ESYS_MPI
259 if (fsCode == Nodes) {
260 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
261 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
262 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
263 } else
264 throw RipleyException("ownSample() only implemented for Nodes");
265 #else
266 return true;
267 #endif
268 }
269
270 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
271 {
272 escript::Data& in = *const_cast<escript::Data*>(&cIn);
273 const dim_t numComp = in.getDataPointSize();
274 const double h0 = m_l0/m_gNE0;
275 const double h1 = m_l1/m_gNE1;
276 const double h2 = m_l1/m_gNE2;
277 const double C0 = .044658198738520451079;
278 const double C1 = .16666666666666666667;
279 const double C2 = .21132486540518711775;
280 const double C3 = .25;
281 const double C4 = .5;
282 const double C5 = .62200846792814621559;
283 const double C6 = .78867513459481288225;
284
285 if (out.getFunctionSpace().getTypeCode() == Elements) {
286 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
287 #pragma omp parallel for
288 for (index_t k2=0; k2 < m_NE2; ++k2) {
289 for (index_t k1=0; k1 < m_NE1; ++k1) {
290 for (index_t k0=0; k0 < m_NE0; ++k0) {
291 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
292 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
293 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
294 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
295 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
296 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
297 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
298 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
299 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
300 for (index_t i=0; i < numComp; ++i) {
301 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
302 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
303 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
304 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
305 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
306 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
307 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
308 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
309 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
310 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
311 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
312 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
313 o[INDEX3(i,0,0,numComp,3)] = V0;
314 o[INDEX3(i,1,0,numComp,3)] = V4;
315 o[INDEX3(i,2,0,numComp,3)] = V8;
316 o[INDEX3(i,0,1,numComp,3)] = V0;
317 o[INDEX3(i,1,1,numComp,3)] = V5;
318 o[INDEX3(i,2,1,numComp,3)] = V9;
319 o[INDEX3(i,0,2,numComp,3)] = V1;
320 o[INDEX3(i,1,2,numComp,3)] = V4;
321 o[INDEX3(i,2,2,numComp,3)] = V10;
322 o[INDEX3(i,0,3,numComp,3)] = V1;
323 o[INDEX3(i,1,3,numComp,3)] = V5;
324 o[INDEX3(i,2,3,numComp,3)] = V11;
325 o[INDEX3(i,0,4,numComp,3)] = V2;
326 o[INDEX3(i,1,4,numComp,3)] = V6;
327 o[INDEX3(i,2,4,numComp,3)] = V8;
328 o[INDEX3(i,0,5,numComp,3)] = V2;
329 o[INDEX3(i,1,5,numComp,3)] = V7;
330 o[INDEX3(i,2,5,numComp,3)] = V9;
331 o[INDEX3(i,0,6,numComp,3)] = V3;
332 o[INDEX3(i,1,6,numComp,3)] = V6;
333 o[INDEX3(i,2,6,numComp,3)] = V10;
334 o[INDEX3(i,0,7,numComp,3)] = V3;
335 o[INDEX3(i,1,7,numComp,3)] = V7;
336 o[INDEX3(i,2,7,numComp,3)] = V11;
337 } /* end of component loop i */
338 } /* end of k0 loop */
339 } /* end of k1 loop */
340 } /* end of k2 loop */
341 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
342 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
343 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
344 #pragma omp parallel for
345 for (index_t k2=0; k2 < m_NE2; ++k2) {
346 for (index_t k1=0; k1 < m_NE1; ++k1) {
347 for (index_t k0=0; k0 < m_NE0; ++k0) {
348 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
349 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
350 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
351 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
352 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
353 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
354 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
355 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
356 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
357 for (index_t i=0; i < numComp; ++i) {
358 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
359 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
360 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
361 } /* end of component loop i */
362 } /* end of k0 loop */
363 } /* end of k1 loop */
364 } /* end of k2 loop */
365 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
366 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
367 /*** GENERATOR SNIP_GRAD_FACES TOP */
368 #pragma omp parallel
369 {
370 if (m_faceOffset[0] > -1) {
371 #pragma omp for nowait
372 for (index_t k2=0; k2 < m_NE2; ++k2) {
373 for (index_t k1=0; k1 < m_NE1; ++k1) {
374 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
375 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
376 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
377 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
378 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
379 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
380 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
381 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
382 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
383 for (index_t i=0; i < numComp; ++i) {
384 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
385 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
386 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
387 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
388 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
389 o[INDEX3(i,1,0,numComp,3)] = V0;
390 o[INDEX3(i,2,0,numComp,3)] = V2;
391 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
392 o[INDEX3(i,1,1,numComp,3)] = V0;
393 o[INDEX3(i,2,1,numComp,3)] = V3;
394 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
395 o[INDEX3(i,1,2,numComp,3)] = V1;
396 o[INDEX3(i,2,2,numComp,3)] = V2;
397 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
398 o[INDEX3(i,1,3,numComp,3)] = V1;
399 o[INDEX3(i,2,3,numComp,3)] = V3;
400 } /* end of component loop i */
401 } /* end of k1 loop */
402 } /* end of k2 loop */
403 } /* end of face 0 */
404 if (m_faceOffset[1] > -1) {
405 #pragma omp for nowait
406 for (index_t k2=0; k2 < m_NE2; ++k2) {
407 for (index_t k1=0; k1 < m_NE1; ++k1) {
408 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
409 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
410 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
411 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
412 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
413 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
414 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
415 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
416 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
417 for (index_t i=0; i < numComp; ++i) {
418 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
419 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
420 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
421 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
422 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
423 o[INDEX3(i,1,0,numComp,3)] = V0;
424 o[INDEX3(i,2,0,numComp,3)] = V2;
425 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
426 o[INDEX3(i,1,1,numComp,3)] = V0;
427 o[INDEX3(i,2,1,numComp,3)] = V3;
428 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
429 o[INDEX3(i,1,2,numComp,3)] = V1;
430 o[INDEX3(i,2,2,numComp,3)] = V2;
431 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
432 o[INDEX3(i,1,3,numComp,3)] = V1;
433 o[INDEX3(i,2,3,numComp,3)] = V3;
434 } /* end of component loop i */
435 } /* end of k1 loop */
436 } /* end of k2 loop */
437 } /* end of face 1 */
438 if (m_faceOffset[2] > -1) {
439 #pragma omp for nowait
440 for (index_t k2=0; k2 < m_NE2; ++k2) {
441 for (index_t k0=0; k0 < m_NE0; ++k0) {
442 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
443 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
444 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
445 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
446 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
447 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
448 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
449 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
450 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
451 for (index_t i=0; i < numComp; ++i) {
452 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
453 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
454 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
455 o[INDEX3(i,0,0,numComp,3)] = V0;
456 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
457 o[INDEX3(i,2,0,numComp,3)] = V1;
458 o[INDEX3(i,0,1,numComp,3)] = V0;
459 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
460 o[INDEX3(i,2,1,numComp,3)] = V2;
461 o[INDEX3(i,0,2,numComp,3)] = V0;
462 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
463 o[INDEX3(i,2,2,numComp,3)] = V1;
464 o[INDEX3(i,0,3,numComp,3)] = V0;
465 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
466 o[INDEX3(i,2,3,numComp,3)] = V2;
467 } /* end of component loop i */
468 } /* end of k0 loop */
469 } /* end of k2 loop */
470 } /* end of face 2 */
471 if (m_faceOffset[3] > -1) {
472 #pragma omp for nowait
473 for (index_t k2=0; k2 < m_NE2; ++k2) {
474 for (index_t k0=0; k0 < m_NE0; ++k0) {
475 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
476 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
477 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
478 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
479 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
480 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
481 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
482 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
483 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
484 for (index_t i=0; i < numComp; ++i) {
485 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
486 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
487 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
488 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
489 o[INDEX3(i,0,0,numComp,3)] = V0;
490 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
491 o[INDEX3(i,2,0,numComp,3)] = V2;
492 o[INDEX3(i,0,1,numComp,3)] = V0;
493 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
494 o[INDEX3(i,2,1,numComp,3)] = V3;
495 o[INDEX3(i,0,2,numComp,3)] = V1;
496 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
497 o[INDEX3(i,2,2,numComp,3)] = V2;
498 o[INDEX3(i,0,3,numComp,3)] = V1;
499 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
500 o[INDEX3(i,2,3,numComp,3)] = V3;
501 } /* end of component loop i */
502 } /* end of k0 loop */
503 } /* end of k2 loop */
504 } /* end of face 3 */
505 if (m_faceOffset[4] > -1) {
506 #pragma omp for nowait
507 for (index_t k1=0; k1 < m_NE1; ++k1) {
508 for (index_t k0=0; k0 < m_NE0; ++k0) {
509 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
510 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
511 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
512 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
513 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
514 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
515 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
516 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
517 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
518 for (index_t i=0; i < numComp; ++i) {
519 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
520 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
521 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
522 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
523 o[INDEX3(i,0,0,numComp,3)] = V0;
524 o[INDEX3(i,1,0,numComp,3)] = V2;
525 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
526 o[INDEX3(i,0,1,numComp,3)] = V0;
527 o[INDEX3(i,1,1,numComp,3)] = V3;
528 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
529 o[INDEX3(i,0,2,numComp,3)] = V1;
530 o[INDEX3(i,1,2,numComp,3)] = V2;
531 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
532 o[INDEX3(i,0,3,numComp,3)] = V1;
533 o[INDEX3(i,1,3,numComp,3)] = V3;
534 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
535 } /* end of component loop i */
536 } /* end of k0 loop */
537 } /* end of k1 loop */
538 } /* end of face 4 */
539 if (m_faceOffset[5] > -1) {
540 #pragma omp for nowait
541 for (index_t k1=0; k1 < m_NE1; ++k1) {
542 for (index_t k0=0; k0 < m_NE0; ++k0) {
543 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
544 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
545 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
546 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
547 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
548 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
549 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
550 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
551 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
552 for (index_t i=0; i < numComp; ++i) {
553 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
554 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
555 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
556 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
557 o[INDEX3(i,0,0,numComp,3)] = V0;
558 o[INDEX3(i,1,0,numComp,3)] = V2;
559 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
560 o[INDEX3(i,0,1,numComp,3)] = V0;
561 o[INDEX3(i,1,1,numComp,3)] = V3;
562 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
563 o[INDEX3(i,0,2,numComp,3)] = V1;
564 o[INDEX3(i,1,2,numComp,3)] = V2;
565 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
566 o[INDEX3(i,0,3,numComp,3)] = V1;
567 o[INDEX3(i,1,3,numComp,3)] = V3;
568 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
569 } /* end of component loop i */
570 } /* end of k0 loop */
571 } /* end of k1 loop */
572 } /* end of face 5 */
573 } // end of parallel section
574 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
575 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
576 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
577 #pragma omp parallel
578 {
579 if (m_faceOffset[0] > -1) {
580 #pragma omp for nowait
581 for (index_t k2=0; k2 < m_NE2; ++k2) {
582 for (index_t k1=0; k1 < m_NE1; ++k1) {
583 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
584 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
585 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
586 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
587 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
588 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
589 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
590 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
591 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
592 for (index_t i=0; i < numComp; ++i) {
593 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
594 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
595 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
596 } /* end of component loop i */
597 } /* end of k1 loop */
598 } /* end of k2 loop */
599 } /* end of face 0 */
600 if (m_faceOffset[1] > -1) {
601 #pragma omp for nowait
602 for (index_t k2=0; k2 < m_NE2; ++k2) {
603 for (index_t k1=0; k1 < m_NE1; ++k1) {
604 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
605 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
606 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
607 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
608 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
609 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
610 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
611 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
612 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
613 for (index_t i=0; i < numComp; ++i) {
614 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
615 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
616 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
617 } /* end of component loop i */
618 } /* end of k1 loop */
619 } /* end of k2 loop */
620 } /* end of face 1 */
621 if (m_faceOffset[2] > -1) {
622 #pragma omp for nowait
623 for (index_t k2=0; k2 < m_NE2; ++k2) {
624 for (index_t k0=0; k0 < m_NE0; ++k0) {
625 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
626 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
627 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
628 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
629 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
630 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
631 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
632 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
633 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
634 for (index_t i=0; i < numComp; ++i) {
635 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
636 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
637 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
638 } /* end of component loop i */
639 } /* end of k0 loop */
640 } /* end of k2 loop */
641 } /* end of face 2 */
642 if (m_faceOffset[3] > -1) {
643 #pragma omp for nowait
644 for (index_t k2=0; k2 < m_NE2; ++k2) {
645 for (index_t k0=0; k0 < m_NE0; ++k0) {
646 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
647 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
648 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
649 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
650 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
651 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
652 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
653 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
654 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
655 for (index_t i=0; i < numComp; ++i) {
656 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
657 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
658 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
659 } /* end of component loop i */
660 } /* end of k0 loop */
661 } /* end of k2 loop */
662 } /* end of face 3 */
663 if (m_faceOffset[4] > -1) {
664 #pragma omp for nowait
665 for (index_t k1=0; k1 < m_NE1; ++k1) {
666 for (index_t k0=0; k0 < m_NE0; ++k0) {
667 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
668 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
669 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
670 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
671 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
672 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
673 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
674 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
675 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
676 for (index_t i=0; i < numComp; ++i) {
677 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
678 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
679 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
680 } /* end of component loop i */
681 } /* end of k0 loop */
682 } /* end of k1 loop */
683 } /* end of face 4 */
684 if (m_faceOffset[5] > -1) {
685 #pragma omp for nowait
686 for (index_t k1=0; k1 < m_NE1; ++k1) {
687 for (index_t k0=0; k0 < m_NE0; ++k0) {
688 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
689 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
690 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
691 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
692 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
693 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
694 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
695 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
696 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
697 for (index_t i=0; i < numComp; ++i) {
698 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
699 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
700 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
701 } /* end of component loop i */
702 } /* end of k0 loop */
703 } /* end of k1 loop */
704 } /* end of face 5 */
705 } // end of parallel section
706 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
707 } else {
708 stringstream msg;
709 msg << "setToGradient() not implemented for "
710 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
711 throw RipleyException(msg.str());
712 }
713 }
714
715 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
716 {
717 escript::Data& in = *const_cast<escript::Data*>(&arg);
718 const dim_t numComp = in.getDataPointSize();
719 const double h0 = m_l0/m_gNE0;
720 const double h1 = m_l1/m_gNE1;
721 const double h2 = m_l2/m_gNE2;
722 if (arg.getFunctionSpace().getTypeCode() == Elements) {
723 const double w_0 = h0*h1*h2/8.;
724 #pragma omp parallel
725 {
726 vector<double> int_local(numComp, 0);
727 #pragma omp for nowait
728 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
729 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
730 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
731 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
732 for (index_t i=0; i < numComp; ++i) {
733 const register double f_0 = f[INDEX2(i,0,numComp)];
734 const register double f_1 = f[INDEX2(i,1,numComp)];
735 const register double f_2 = f[INDEX2(i,2,numComp)];
736 const register double f_3 = f[INDEX2(i,3,numComp)];
737 const register double f_4 = f[INDEX2(i,4,numComp)];
738 const register double f_5 = f[INDEX2(i,5,numComp)];
739 const register double f_6 = f[INDEX2(i,6,numComp)];
740 const register double f_7 = f[INDEX2(i,7,numComp)];
741 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
742 } /* end of component loop i */
743 } /* end of k0 loop */
744 } /* end of k1 loop */
745 } /* end of k2 loop */
746
747 #pragma omp critical
748 for (index_t i=0; i<numComp; i++)
749 integrals[i]+=int_local[i];
750 } // end of parallel section
751 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
752 const double w_0 = h0*h1*h2;
753 #pragma omp parallel
754 {
755 vector<double> int_local(numComp, 0);
756 #pragma omp for nowait
757 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
758 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
759 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
760 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
761 for (index_t i=0; i < numComp; ++i) {
762 int_local[i]+=f[i]*w_0;
763 } /* end of component loop i */
764 } /* end of k0 loop */
765 } /* end of k1 loop */
766 } /* end of k2 loop */
767
768 #pragma omp critical
769 for (index_t i=0; i<numComp; i++)
770 integrals[i]+=int_local[i];
771 } // end of parallel section
772 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
773 const double w_0 = h1*h2/4.;
774 const double w_1 = h0*h2/4.;
775 const double w_2 = h0*h1/4.;
776 #pragma omp parallel
777 {
778 vector<double> int_local(numComp, 0);
779 if (m_faceOffset[0] > -1) {
780 #pragma omp for nowait
781 for (index_t k2=0; k2 < m_NE2; ++k2) {
782 for (index_t k1=0; k1 < m_NE1; ++k1) {
783 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
784 for (index_t i=0; i < numComp; ++i) {
785 const register double f_0 = f[INDEX2(i,0,numComp)];
786 const register double f_1 = f[INDEX2(i,1,numComp)];
787 const register double f_2 = f[INDEX2(i,2,numComp)];
788 const register double f_3 = f[INDEX2(i,3,numComp)];
789 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
790 } /* end of component loop i */
791 } /* end of k1 loop */
792 } /* end of k2 loop */
793 }
794
795 if (m_faceOffset[1] > -1) {
796 #pragma omp for nowait
797 for (index_t k2=0; k2 < m_NE2; ++k2) {
798 for (index_t k1=0; k1 < m_NE1; ++k1) {
799 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
800 for (index_t i=0; i < numComp; ++i) {
801 const register double f_0 = f[INDEX2(i,0,numComp)];
802 const register double f_1 = f[INDEX2(i,1,numComp)];
803 const register double f_2 = f[INDEX2(i,2,numComp)];
804 const register double f_3 = f[INDEX2(i,3,numComp)];
805 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
806 } /* end of component loop i */
807 } /* end of k1 loop */
808 } /* end of k2 loop */
809 }
810
811 if (m_faceOffset[2] > -1) {
812 #pragma omp for nowait
813 for (index_t k2=0; k2 < m_NE2; ++k2) {
814 for (index_t k0=0; k0 < m_NE0; ++k0) {
815 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
816 for (index_t i=0; i < numComp; ++i) {
817 const register double f_0 = f[INDEX2(i,0,numComp)];
818 const register double f_1 = f[INDEX2(i,1,numComp)];
819 const register double f_2 = f[INDEX2(i,2,numComp)];
820 const register double f_3 = f[INDEX2(i,3,numComp)];
821 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
822 } /* end of component loop i */
823 } /* end of k1 loop */
824 } /* end of k2 loop */
825 }
826
827 if (m_faceOffset[3] > -1) {
828 #pragma omp for nowait
829 for (index_t k2=0; k2 < m_NE2; ++k2) {
830 for (index_t k0=0; k0 < m_NE0; ++k0) {
831 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
832 for (index_t i=0; i < numComp; ++i) {
833 const register double f_0 = f[INDEX2(i,0,numComp)];
834 const register double f_1 = f[INDEX2(i,1,numComp)];
835 const register double f_2 = f[INDEX2(i,2,numComp)];
836 const register double f_3 = f[INDEX2(i,3,numComp)];
837 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
838 } /* end of component loop i */
839 } /* end of k1 loop */
840 } /* end of k2 loop */
841 }
842
843 if (m_faceOffset[4] > -1) {
844 #pragma omp for nowait
845 for (index_t k1=0; k1 < m_NE1; ++k1) {
846 for (index_t k0=0; k0 < m_NE0; ++k0) {
847 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
848 for (index_t i=0; i < numComp; ++i) {
849 const register double f_0 = f[INDEX2(i,0,numComp)];
850 const register double f_1 = f[INDEX2(i,1,numComp)];
851 const register double f_2 = f[INDEX2(i,2,numComp)];
852 const register double f_3 = f[INDEX2(i,3,numComp)];
853 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
854 } /* end of component loop i */
855 } /* end of k1 loop */
856 } /* end of k2 loop */
857 }
858
859 if (m_faceOffset[5] > -1) {
860 #pragma omp for nowait
861 for (index_t k1=0; k1 < m_NE1; ++k1) {
862 for (index_t k0=0; k0 < m_NE0; ++k0) {
863 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
864 for (index_t i=0; i < numComp; ++i) {
865 const register double f_0 = f[INDEX2(i,0,numComp)];
866 const register double f_1 = f[INDEX2(i,1,numComp)];
867 const register double f_2 = f[INDEX2(i,2,numComp)];
868 const register double f_3 = f[INDEX2(i,3,numComp)];
869 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
870 } /* end of component loop i */
871 } /* end of k1 loop */
872 } /* end of k2 loop */
873 }
874
875 #pragma omp critical
876 for (index_t i=0; i<numComp; i++)
877 integrals[i]+=int_local[i];
878 } // end of parallel section
879
880 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
881 const double w_0 = h1*h2;
882 const double w_1 = h0*h2;
883 const double w_2 = h0*h1;
884 #pragma omp parallel
885 {
886 vector<double> int_local(numComp, 0);
887 if (m_faceOffset[0] > -1) {
888 #pragma omp for nowait
889 for (index_t k2=0; k2 < m_NE2; ++k2) {
890 for (index_t k1=0; k1 < m_NE1; ++k1) {
891 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
892 for (index_t i=0; i < numComp; ++i) {
893 int_local[i]+=f[i]*w_0;
894 } /* end of component loop i */
895 } /* end of k1 loop */
896 } /* end of k2 loop */
897 }
898
899 if (m_faceOffset[1] > -1) {
900 #pragma omp for nowait
901 for (index_t k2=0; k2 < m_NE2; ++k2) {
902 for (index_t k1=0; k1 < m_NE1; ++k1) {
903 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
904 for (index_t i=0; i < numComp; ++i) {
905 int_local[i]+=f[i]*w_0;
906 } /* end of component loop i */
907 } /* end of k1 loop */
908 } /* end of k2 loop */
909 }
910
911 if (m_faceOffset[2] > -1) {
912 #pragma omp for nowait
913 for (index_t k2=0; k2 < m_NE2; ++k2) {
914 for (index_t k0=0; k0 < m_NE0; ++k0) {
915 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
916 for (index_t i=0; i < numComp; ++i) {
917 int_local[i]+=f[i]*w_1;
918 } /* end of component loop i */
919 } /* end of k1 loop */
920 } /* end of k2 loop */
921 }
922
923 if (m_faceOffset[3] > -1) {
924 #pragma omp for nowait
925 for (index_t k2=0; k2 < m_NE2; ++k2) {
926 for (index_t k0=0; k0 < m_NE0; ++k0) {
927 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
928 for (index_t i=0; i < numComp; ++i) {
929 int_local[i]+=f[i]*w_1;
930 } /* end of component loop i */
931 } /* end of k1 loop */
932 } /* end of k2 loop */
933 }
934
935 if (m_faceOffset[4] > -1) {
936 #pragma omp for nowait
937 for (index_t k1=0; k1 < m_NE1; ++k1) {
938 for (index_t k0=0; k0 < m_NE0; ++k0) {
939 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
940 for (index_t i=0; i < numComp; ++i) {
941 int_local[i]+=f[i]*w_2;
942 } /* end of component loop i */
943 } /* end of k1 loop */
944 } /* end of k2 loop */
945 }
946
947 if (m_faceOffset[5] > -1) {
948 #pragma omp for nowait
949 for (index_t k1=0; k1 < m_NE1; ++k1) {
950 for (index_t k0=0; k0 < m_NE0; ++k0) {
951 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
952 for (index_t i=0; i < numComp; ++i) {
953 int_local[i]+=f[i]*w_2;
954 } /* end of component loop i */
955 } /* end of k1 loop */
956 } /* end of k2 loop */
957 }
958
959 #pragma omp critical
960 for (index_t i=0; i<numComp; i++)
961 integrals[i]+=int_local[i];
962 } // end of parallel section
963
964 } else {
965 stringstream msg;
966 msg << "setToIntegrals() not implemented for "
967 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
968 throw RipleyException(msg.str());
969 }
970 }
971
972 void Brick::setToNormal(escript::Data& out) const
973 {
974 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
975 #pragma omp parallel
976 {
977 if (m_faceOffset[0] > -1) {
978 #pragma omp for nowait
979 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
980 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
981 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
982 // set vector at four quadrature points
983 *o++ = -1.; *o++ = 0.; *o++ = 0.;
984 *o++ = -1.; *o++ = 0.; *o++ = 0.;
985 *o++ = -1.; *o++ = 0.; *o++ = 0.;
986 *o++ = -1.; *o++ = 0.; *o = 0.;
987 }
988 }
989 }
990
991 if (m_faceOffset[1] > -1) {
992 #pragma omp for nowait
993 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
994 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
995 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
996 // set vector at four quadrature points
997 *o++ = 1.; *o++ = 0.; *o++ = 0.;
998 *o++ = 1.; *o++ = 0.; *o++ = 0.;
999 *o++ = 1.; *o++ = 0.; *o++ = 0.;
1000 *o++ = 1.; *o++ = 0.; *o = 0.;
1001 }
1002 }
1003 }
1004
1005 if (m_faceOffset[2] > -1) {
1006 #pragma omp for nowait
1007 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1008 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1009 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1010 // set vector at four quadrature points
1011 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1012 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1013 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1014 *o++ = 0.; *o++ = -1.; *o = 0.;
1015 }
1016 }
1017 }
1018
1019 if (m_faceOffset[3] > -1) {
1020 #pragma omp for nowait
1021 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1022 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1023 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1024 // set vector at four quadrature points
1025 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1026 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1027 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1028 *o++ = 0.; *o++ = 1.; *o = 0.;
1029 }
1030 }
1031 }
1032
1033 if (m_faceOffset[4] > -1) {
1034 #pragma omp for nowait
1035 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1036 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1037 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1038 // set vector at four quadrature points
1039 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1040 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1041 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1042 *o++ = 0.; *o++ = 0.; *o = -1.;
1043 }
1044 }
1045 }
1046
1047 if (m_faceOffset[5] > -1) {
1048 #pragma omp for nowait
1049 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1050 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1051 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1052 // set vector at four quadrature points
1053 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1054 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1055 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1056 *o++ = 0.; *o++ = 0.; *o = 1.;
1057 }
1058 }
1059 }
1060 } // end of parallel section
1061 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1062 #pragma omp parallel
1063 {
1064 if (m_faceOffset[0] > -1) {
1065 #pragma omp for nowait
1066 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1067 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1068 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1069 *o++ = -1.;
1070 *o++ = 0.;
1071 *o = 0.;
1072 }
1073 }
1074 }
1075
1076 if (m_faceOffset[1] > -1) {
1077 #pragma omp for nowait
1078 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1079 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1080 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1081 *o++ = 1.;
1082 *o++ = 0.;
1083 *o = 0.;
1084 }
1085 }
1086 }
1087
1088 if (m_faceOffset[2] > -1) {
1089 #pragma omp for nowait
1090 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1091 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1092 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1093 *o++ = 0.;
1094 *o++ = -1.;
1095 *o = 0.;
1096 }
1097 }
1098 }
1099
1100 if (m_faceOffset[3] > -1) {
1101 #pragma omp for nowait
1102 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1103 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1104 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1105 *o++ = 0.;
1106 *o++ = 1.;
1107 *o = 0.;
1108 }
1109 }
1110 }
1111
1112 if (m_faceOffset[4] > -1) {
1113 #pragma omp for nowait
1114 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1115 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1116 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1117 *o++ = 0.;
1118 *o++ = 0.;
1119 *o = -1.;
1120 }
1121 }
1122 }
1123
1124 if (m_faceOffset[5] > -1) {
1125 #pragma omp for nowait
1126 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1127 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1128 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1129 *o++ = 0.;
1130 *o++ = 0.;
1131 *o = 1.;
1132 }
1133 }
1134 }
1135 } // end of parallel section
1136
1137 } else {
1138 stringstream msg;
1139 msg << "setToNormal() not implemented for "
1140 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1141 throw RipleyException(msg.str());
1142 }
1143 }
1144
1145 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1146 bool reducedColOrder) const
1147 {
1148 if (reducedRowOrder || reducedColOrder)
1149 throw RipleyException("getPattern() not implemented for reduced order");
1150
1151 throw RipleyException("getPattern() not implemented");
1152 }
1153
1154 void Brick::Print_Mesh_Info(const bool full) const
1155 {
1156 RipleyDomain::Print_Mesh_Info(full);
1157 if (full) {
1158 cout << " Id Coordinates" << endl;
1159 cout.precision(15);
1160 cout.setf(ios::scientific, ios::floatfield);
1161 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1162 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1163 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1164 for (index_t i=0; i < getNumNodes(); i++) {
1165 cout << " " << setw(5) << m_nodeId[i]
1166 << " " << xdx.first+(i%m_N0)*xdx.second
1167 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1168 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1169 }
1170 }
1171 }
1172
1173 IndexVector Brick::getNumNodesPerDim() const
1174 {
1175 IndexVector ret;
1176 ret.push_back(m_N0);
1177 ret.push_back(m_N1);
1178 ret.push_back(m_N2);
1179 return ret;
1180 }
1181
1182 IndexVector Brick::getNumElementsPerDim() const
1183 {
1184 IndexVector ret;
1185 ret.push_back(m_NE0);
1186 ret.push_back(m_NE1);
1187 ret.push_back(m_NE2);
1188 return ret;
1189 }
1190
1191 IndexVector Brick::getNumFacesPerBoundary() const
1192 {
1193 IndexVector ret(6, 0);
1194 //left
1195 if (m_offset0==0)
1196 ret[0]=m_NE1*m_NE2;
1197 //right
1198 if (m_mpiInfo->rank%m_NX==m_NX-1)
1199 ret[1]=m_NE1*m_NE2;
1200 //bottom
1201 if (m_offset1==0)
1202 ret[2]=m_NE0*m_NE2;
1203 //top
1204 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1205 ret[3]=m_NE0*m_NE2;
1206 //front
1207 if (m_offset2==0)
1208 ret[4]=m_NE0*m_NE1;
1209 //back
1210 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1211 ret[5]=m_NE0*m_NE1;
1212 return ret;
1213 }
1214
1215 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1216 {
1217 if (dim==0)
1218 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1219 else if (dim==1)
1220 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1221 else if (dim==2)
1222 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1223
1224 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1225 }
1226
1227 //protected
1228 dim_t Brick::getNumFaceElements() const
1229 {
1230 dim_t n=0;
1231 //left
1232 if (m_offset0==0)
1233 n+=m_NE1*m_NE2;
1234 //right
1235 if (m_mpiInfo->rank%m_NX==m_NX-1)
1236 n+=m_NE1*m_NE2;
1237 //bottom
1238 if (m_offset1==0)
1239 n+=m_NE0*m_NE2;
1240 //top
1241 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1242 n+=m_NE0*m_NE2;
1243 //front
1244 if (m_offset2==0)
1245 n+=m_NE0*m_NE1;
1246 //back
1247 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1248 n+=m_NE0*m_NE1;
1249
1250 return n;
1251 }
1252
1253 //protected
1254 void Brick::assembleCoordinates(escript::Data& arg) const
1255 {
1256 escriptDataC x = arg.getDataC();
1257 int numDim = m_numDim;
1258 if (!isDataPointShapeEqual(&x, 1, &numDim))
1259 throw RipleyException("setToX: Invalid Data object shape");
1260 if (!numSamplesEqual(&x, 1, getNumNodes()))
1261 throw RipleyException("setToX: Illegal number of samples in Data object");
1262
1263 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1264 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1265 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1266 arg.requireWrite();
1267 #pragma omp parallel for
1268 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1269 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1270 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1271 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1272 point[0] = xdx.first+i0*xdx.second;
1273 point[1] = ydy.first+i1*ydy.second;
1274 point[2] = zdz.first+i2*zdz.second;
1275 }
1276 }
1277 }
1278 }
1279
1280 //private
1281 void Brick::populateSampleIds()
1282 {
1283 // identifiers are ordered from left to right, bottom to top, front to back
1284 // on each rank, except for the shared nodes which are owned by the rank
1285 // below / to the left / to the front of the current rank
1286
1287 // build node distribution vector first.
1288 // m_nodeDistribution[i] is the first node id on rank i, that is
1289 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1290 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1291 m_nodeDistribution[1]=getNumNodes();
1292 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1293 const index_t x = k%m_NX;
1294 const index_t y = k%(m_NX*m_NY)/m_NX;
1295 const index_t z = k/(m_NX*m_NY);
1296 index_t numNodes=getNumNodes();
1297 if (x>0)
1298 numNodes-=m_N1*m_N2;
1299 if (y>0)
1300 numNodes-=m_N0*m_N2;
1301 if (z>0)
1302 numNodes-=m_N0*m_N1;
1303 // if an edge was subtracted twice add it back
1304 if (x>0 && y>0)
1305 numNodes+=m_N2;
1306 if (x>0 && z>0)
1307 numNodes+=m_N1;
1308 if (y>0 && z>0)
1309 numNodes+=m_N0;
1310 // the corner node was removed 3x and added back 3x, so subtract it
1311 if (x>0 && y>0 && z>0)
1312 numNodes--;
1313 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1314 }
1315 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1316
1317 m_nodeId.resize(getNumNodes());
1318
1319 // the bottom, left and front planes are not owned by this rank so the
1320 // identifiers need to be computed accordingly
1321 const index_t left = (m_offset0==0 ? 0 : 1);
1322 const index_t bottom = (m_offset1==0 ? 0 : 1);
1323 const index_t front = (m_offset2==0 ? 0 : 1);
1324
1325 // case 1: all nodes on left plane are owned by rank on the left
1326 if (left>0) {
1327 const int neighbour=m_mpiInfo->rank-1;
1328 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1329 const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1330 #pragma omp parallel for
1331 for (dim_t i2=front; i2<m_N2; i2++) {
1332 for (dim_t i1=bottom; i1<m_N1; i1++) {
1333 m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1334 + (i1-bottom+1)*leftN0
1335 + (i2-front)*leftN0*leftN1 - 1;
1336 }
1337 }
1338 }
1339 // case 2: all nodes on bottom plane are owned by rank below
1340 if (bottom>0) {
1341 const int neighbour=m_mpiInfo->rank-m_NX;
1342 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1343 const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1344 #pragma omp parallel for
1345 for (dim_t i2=front; i2<m_N2; i2++) {
1346 for (dim_t i0=left; i0<m_N0; i0++) {
1347 m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1348 + bottomN0*(bottomN1-1)
1349 + (i2-front)*bottomN0*bottomN1 + i0-left;
1350 }
1351 }
1352 }
1353 // case 3: all nodes on front plane are owned by rank in front
1354 if (front>0) {
1355 const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
1356 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1357 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1358 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1359 #pragma omp parallel for
1360 for (dim_t i1=bottom; i1<m_N1; i1++) {
1361 for (dim_t i0=left; i0<m_N0; i0++) {
1362 m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
1363 + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
1364 }
1365 }
1366 }
1367 // case 4: nodes on front bottom edge are owned by the corresponding rank
1368 if (front>0 && bottom>0) {
1369 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
1370 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1371 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1372 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1373 #pragma omp parallel for
1374 for (dim_t i0=left; i0<m_N0; i0++) {
1375 m_nodeId[i0]=m_nodeDistribution[neighbour]
1376 + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
1377 }
1378 }
1379 // case 5: nodes on left bottom edge are owned by the corresponding rank
1380 if (left>0 && bottom>0) {
1381 const int neighbour=m_mpiInfo->rank-m_NX-1;
1382 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1383 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1384 #pragma omp parallel for
1385 for (dim_t i2=front; i2<m_N2; i2++) {
1386 m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1387 + (1+i2-front)*N0*N1-1;
1388 }
1389 }
1390 // case 6: nodes on left front edge are owned by the corresponding rank
1391 if (left>0 && front>0) {
1392 const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
1393 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1394 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1395 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1396 #pragma omp parallel for
1397 for (dim_t i1=bottom; i1<m_N1; i1++) {
1398 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1399 + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
1400 }
1401 }
1402 // case 7: bottom-left-front corner node owned by corresponding rank
1403 if (left>0 && bottom>0 && front>0) {
1404 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
1405 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1406 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1407 const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
1408 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
1409 }
1410
1411 // the rest of the id's are contiguous
1412 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1413 #pragma omp parallel for
1414 for (dim_t i2=front; i2<m_N2; i2++) {
1415 for (dim_t i1=bottom; i1<m_N1; i1++) {
1416 for (dim_t i0=left; i0<m_N0; i0++) {
1417 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
1418 +(i1-bottom)*(m_N0-left)
1419 +(i2-front)*(m_N0-left)*(m_N1-bottom);
1420 }
1421 }
1422 }
1423 m_nodeTags.assign(getNumNodes(), 0);
1424 updateTagsInUse(Nodes);
1425
1426 // elements
1427 m_elementId.resize(getNumElements());
1428 #pragma omp parallel for
1429 for (dim_t k=0; k<getNumElements(); k++) {
1430 m_elementId[k]=k;
1431 }
1432 m_elementTags.assign(getNumElements(), 0);
1433 updateTagsInUse(Elements);
1434
1435 // face elements
1436 m_faceId.resize(getNumFaceElements());
1437 #pragma omp parallel for
1438 for (dim_t k=0; k<getNumFaceElements(); k++) {
1439 m_faceId[k]=k;
1440 }
1441
1442 // generate face offset vector and set face tags
1443 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1444 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1445 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1446 m_faceOffset.assign(facesPerEdge.size(), -1);
1447 m_faceTags.clear();
1448 index_t offset=0;
1449 for (size_t i=0; i<facesPerEdge.size(); i++) {
1450 if (facesPerEdge[i]>0) {
1451 m_faceOffset[i]=offset;
1452 offset+=facesPerEdge[i];
1453 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1454 }
1455 }
1456 setTagMap("left", LEFT);
1457 setTagMap("right", RIGHT);
1458 setTagMap("bottom", BOTTOM);
1459 setTagMap("top", TOP);
1460 setTagMap("front", FRONT);
1461 setTagMap("back", BACK);
1462 updateTagsInUse(FaceElements);
1463 }
1464
1465 //protected
1466 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1467 bool reduced) const
1468 {
1469 const dim_t numComp = in.getDataPointSize();
1470 if (reduced) {
1471 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1472 const double c0 = .125;
1473 #pragma omp parallel for
1474 for (index_t k2=0; k2 < m_NE2; ++k2) {
1475 for (index_t k1=0; k1 < m_NE1; ++k1) {
1476 for (index_t k0=0; k0 < m_NE0; ++k0) {
1477 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1478 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1479 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1480 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1481 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1482 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1483 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1484 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1485 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1486 for (index_t i=0; i < numComp; ++i) {
1487 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1488 } /* end of component loop i */
1489 } /* end of k0 loop */
1490 } /* end of k1 loop */
1491 } /* end of k2 loop */
1492 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1493 } else {
1494 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1495 const double c0 = .0094373878376559314545;
1496 const double c1 = .035220810900864519624;
1497 const double c2 = .13144585576580214704;
1498 const double c3 = .49056261216234406855;
1499 #pragma omp parallel for
1500 for (index_t k2=0; k2 < m_NE2; ++k2) {
1501 for (index_t k1=0; k1 < m_NE1; ++k1) {
1502 for (index_t k0=0; k0 < m_NE0; ++k0) {
1503 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1504 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1505 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1506 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1507 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1508 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1509 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1510 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1511 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1512 for (index_t i=0; i < numComp; ++i) {
1513 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1514 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1515 o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1516 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1517 o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1518 o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1519 o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1520 o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1521 } /* end of component loop i */
1522 } /* end of k0 loop */
1523 } /* end of k1 loop */
1524 } /* end of k2 loop */
1525 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1526 }
1527 }
1528
1529 //protected
1530 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1531 bool reduced) const
1532 {
1533 const dim_t numComp = in.getDataPointSize();
1534 if (reduced) {
1535 const double c0 = .25;
1536 #pragma omp parallel
1537 {
1538 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1539 if (m_faceOffset[0] > -1) {
1540 #pragma omp for nowait
1541 for (index_t k2=0; k2 < m_NE2; ++k2) {
1542 for (index_t k1=0; k1 < m_NE1; ++k1) {
1543 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1544 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1545 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1546 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1547 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1548 for (index_t i=0; i < numComp; ++i) {
1549 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1550 } /* end of component loop i */
1551 } /* end of k1 loop */
1552 } /* end of k2 loop */
1553 } /* end of face 0 */
1554 if (m_faceOffset[1] > -1) {
1555 #pragma omp for nowait
1556 for (index_t k2=0; k2 < m_NE2; ++k2) {
1557 for (index_t k1=0; k1 < m_NE1; ++k1) {
1558 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1559 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1560 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1561 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1562 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1563 for (index_t i=0; i < numComp; ++i) {
1564 o[INDEX2(i,numComp,0)] = c0*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1565 } /* end of component loop i */
1566 } /* end of k1 loop */
1567 } /* end of k2 loop */
1568 } /* end of face 1 */
1569 if (m_faceOffset[2] > -1) {
1570 #pragma omp for nowait
1571 for (index_t k2=0; k2 < m_NE2; ++k2) {
1572 for (index_t k0=0; k0 < m_NE0; ++k0) {
1573 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1574 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1575 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1576 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1577 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1578 for (index_t i=0; i < numComp; ++i) {
1579 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
1580 } /* end of component loop i */
1581 } /* end of k0 loop */
1582 } /* end of k2 loop */
1583 } /* end of face 2 */
1584 if (m_faceOffset[3] > -1) {
1585 #pragma omp for nowait
1586 for (index_t k2=0; k2 < m_NE2; ++k2) {
1587 for (index_t k0=0; k0 < m_NE0; ++k0) {
1588 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1589 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1590 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1591 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1592 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1593 for (index_t i=0; i < numComp; ++i) {
1594 o[INDEX2(i,numComp,0)] = c0*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
1595 } /* end of component loop i */
1596 } /* end of k0 loop */
1597 } /* end of k2 loop */
1598 } /* end of face 3 */
1599 if (m_faceOffset[4] > -1) {
1600 #pragma omp for nowait
1601 for (index_t k1=0; k1 < m_NE1; ++k1) {
1602 for (index_t k0=0; k0 < m_NE0; ++k0) {
1603 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1604 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1605 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1606 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1607 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1608 for (index_t i=0; i < numComp; ++i) {
1609 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_010[i] + f_100[i] + f_110[i]);
1610 } /* end of component loop i */
1611 } /* end of k0 loop */
1612 } /* end of k1 loop */
1613 } /* end of face 4 */
1614 if (m_faceOffset[5] > -1) {
1615 #pragma omp for nowait
1616 for (index_t k1=0; k1 < m_NE1; ++k1) {
1617 for (index_t k0=0; k0 < m_NE0; ++k0) {
1618 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1619 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1620 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1621 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1622 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1623 for (index_t i=0; i < numComp; ++i) {
1624 o[INDEX2(i,numComp,0)] = c0*(f_001[i] + f_011[i] + f_101[i] + f_111[i]);
1625 } /* end of component loop i */
1626 } /* end of k0 loop */
1627 } /* end of k1 loop */
1628 } /* end of face 5 */
1629 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1630 } // end of parallel section
1631 } else {
1632 const double c0 = 0.044658198738520451079;
1633 const double c1 = 0.16666666666666666667;
1634 const double c2 = 0.62200846792814621559;
1635 #pragma omp parallel
1636 {
1637 /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1638 if (m_faceOffset[0] > -1) {
1639 #pragma omp for nowait
1640 for (index_t k2=0; k2 < m_NE2; ++k2) {
1641 for (index_t k1=0; k1 < m_NE1; ++k1) {
1642 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1643 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1644 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1645 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1646 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1647 for (index_t i=0; i < numComp; ++i) {
1648 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_011[i]*c0 + c1*(f_001[i] + f_010[i]);
1649 o[INDEX2(i,numComp,1)] = f_001[i]*c0 + f_010[i]*c2 + c1*(f_000[i] + f_011[i]);
1650 o[INDEX2(i,numComp,2)] = f_001[i]*c2 + f_010[i]*c0 + c1*(f_000[i] + f_011[i]);
1651 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_011[i]*c2 + c1*(f_001[i] + f_010[i]);
1652 } /* end of component loop i */
1653 } /* end of k1 loop */
1654 } /* end of k2 loop */
1655 } /* end of face 0 */
1656 if (m_faceOffset[1] > -1) {
1657 #pragma omp for nowait
1658 for (index_t k2=0; k2 < m_NE2; ++k2) {
1659 for (index_t k1=0; k1 < m_NE1; ++k1) {
1660 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1661 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1662 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1663 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1664 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1665 for (index_t i=0; i < numComp; ++i) {
1666 o[INDEX2(i,numComp,0)] = f_100[i]*c2 + f_111[i]*c0 + c1*(f_101[i] + f_110[i]);
1667 o[INDEX2(i,numComp,1)] = f_101[i]*c0 + f_110[i]*c2 + c1*(f_100[i] + f_111[i]);
1668 o[INDEX2(i,numComp,2)] = f_101[i]*c2 + f_110[i]*c0 + c1*(f_100[i] + f_111[i]);
1669 o[INDEX2(i,numComp,3)] = f_100[i]*c0 + f_111[i]*c2 + c1*(f_101[i] + f_110[i]);
1670 } /* end of component loop i */
1671 } /* end of k1 loop */
1672 } /* end of k2 loop */
1673 } /* end of face 1 */
1674 if (m_faceOffset[2] > -1) {
1675 #pragma omp for nowait
1676 for (index_t k2=0; k2 < m_NE2; ++k2) {
1677 for (index_t k0=0; k0 < m_NE0; ++k0) {
1678 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1679 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1680 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1681 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1682 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1683 for (index_t i=0; i < numComp; ++i) {
1684 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_101[i]*c0 + c1*(f_001[i] + f_100[i]);
1685 o[INDEX2(i,numComp,1)] = f_001[i]*c0 + f_100[i]*c2 + c1*(f_000[i] + f_101[i]);
1686 o[INDEX2(i,numComp,2)] = f_001[i]*c2 + f_100[i]*c0 + c1*(f_000[i] + f_101[i]);
1687 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_101[i]*c2 + c1*(f_001[i] + f_100[i]);
1688 } /* end of component loop i */
1689 } /* end of k0 loop */
1690 } /* end of k2 loop */
1691 } /* end of face 2 */
1692 if (m_faceOffset[3] > -1) {
1693 #pragma omp for nowait
1694 for (index_t k2=0; k2 < m_NE2; ++k2) {
1695 for (index_t k0=0; k0 < m_NE0; ++k0) {
1696 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1697 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1698 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1699 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1700 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1701 for (index_t i=0; i < numComp; ++i) {
1702 o[INDEX2(i,numComp,0)] = f_010[i]*c2 + f_111[i]*c0 + c1*(f_011[i] + f_110[i]);
1703 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_110[i]*c2 + c1*(f_010[i] + f_111[i]);
1704 o[INDEX2(i,numComp,2)] = f_011[i]*c2 + f_110[i]*c0 + c1*(f_010[i] + f_111[i]);
1705 o[INDEX2(i,numComp,3)] = f_010[i]*c0 + f_111[i]*c2 + c1*(f_011[i] + f_110[i]);
1706 } /* end of component loop i */
1707 } /* end of k0 loop */
1708 } /* end of k2 loop */
1709 } /* end of face 3 */
1710 if (m_faceOffset[4] > -1) {
1711 #pragma omp for nowait
1712 for (index_t k1=0; k1 < m_NE1; ++k1) {
1713 for (index_t k0=0; k0 < m_NE0; ++k0) {
1714 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1715 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1716 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1717 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1718 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1719 for (index_t i=0; i < numComp; ++i) {
1720 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_110[i]*c0 + c1*(f_010[i] + f_100[i]);
1721 o[INDEX2(i,numComp,1)] = f_010[i]*c0 + f_100[i]*c2 + c1*(f_000[i] + f_110[i]);
1722 o[INDEX2(i,numComp,2)] = f_010[i]*c2 + f_100[i]*c0 + c1*(f_000[i] + f_110[i]);
1723 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_110[i]*c2 + c1*(f_010[i] + f_100[i]);
1724 } /* end of component loop i */
1725 } /* end of k0 loop */
1726 } /* end of k1 loop */
1727 } /* end of face 4 */
1728 if (m_faceOffset[5] > -1) {
1729 #pragma omp for nowait
1730 for (index_t k1=0; k1 < m_NE1; ++k1) {
1731 for (index_t k0=0; k0 < m_NE0; ++k0) {
1732 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1733 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1734 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1735 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1736 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1737 for (index_t i=0; i < numComp; ++i) {
1738 o[INDEX2(i,numComp,0)] = f_001[i]*c2 + f_111[i]*c0 + c1*(f_011[i] + f_101[i]);
1739 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_101[i]*c2 + c1*(f_001[i] + f_111[i]);
1740 o[INDEX2(i,numComp,2)] = f_011[i]*c2 + f_101[i]*c0 + c1*(f_001[i] + f_111[i]);
1741 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_111[i]*c2 + c1*(f_011[i] + f_101[i]);
1742 } /* end of component loop i */
1743 } /* end of k0 loop */
1744 } /* end of k1 loop */
1745 } /* end of face 5 */
1746 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1747 } // end of parallel section
1748 }
1749 }
1750
1751 } // end of namespace ripley
1752

  ViewVC Help
Powered by ViewVC 1.1.26