/[escript]/branches/ripleygmg_from_3668/ripley/src/Brick.cpp
ViewVC logotype

Contents of /branches/ripleygmg_from_3668/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3733 - (show annotations)
Fri Dec 9 04:02:56 2011 UTC (7 years, 4 months ago) by caltinay
File size: 93538 byte(s)
escript on ripley tests pass. However, returning 0 for number of point elements
and DOF id's are bogus.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Brick.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33 int d1, int d2) :
34 RipleyDomain(3),
35 m_gNE0(n0),
36 m_gNE1(n1),
37 m_gNE2(n2),
38 m_l0(l0),
39 m_l1(l1),
40 m_l2(l2),
41 m_NX(d0),
42 m_NY(d1),
43 m_NZ(d2)
44 {
45 // ensure number of subdivisions is valid and nodes can be distributed
46 // among number of ranks
47 if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48 throw RipleyException("Invalid number of spatial subdivisions");
49
50 if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52
53 // local number of elements
54 m_NE0 = n0/m_NX;
55 m_NE1 = n1/m_NY;
56 m_NE2 = n2/m_NZ;
57 // local number of nodes (not necessarily owned)
58 m_N0 = m_NE0+1;
59 m_N1 = m_NE1+1;
60 m_N2 = m_NE2+1;
61 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63 m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64 m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65 populateSampleIds();
66 }
67
68
69 Brick::~Brick()
70 {
71 }
72
73 string Brick::getDescription() const
74 {
75 return "ripley::Brick";
76 }
77
78 bool Brick::operator==(const AbstractDomain& other) const
79 {
80 if (dynamic_cast<const Brick*>(&other))
81 return this==&other;
82
83 return false;
84 }
85
86 void Brick::dump(const string& fileName) const
87 {
88 #if USE_SILO
89 string fn(fileName);
90 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
91 fn+=".silo";
92 }
93
94 const int NUM_SILO_FILES = 1;
95 const char* blockDirFmt = "/block%04d";
96 int driver=DB_HDF5;
97 string siloPath;
98 DBfile* dbfile = NULL;
99
100 #ifdef ESYS_MPI
101 PMPIO_baton_t* baton = NULL;
102 #endif
103
104 if (m_mpiInfo->size > 1) {
105 #ifdef ESYS_MPI
106 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
107 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
108 PMPIO_DefaultClose, (void*)&driver);
109 // try the fallback driver in case of error
110 if (!baton && driver != DB_PDB) {
111 driver = DB_PDB;
112 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
113 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
114 PMPIO_DefaultClose, (void*)&driver);
115 }
116 if (baton) {
117 char str[64];
118 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
119 siloPath = str;
120 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
121 }
122 #endif
123 } else {
124 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
125 getDescription().c_str(), driver);
126 // try the fallback driver in case of error
127 if (!dbfile && driver != DB_PDB) {
128 driver = DB_PDB;
129 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130 getDescription().c_str(), driver);
131 }
132 }
133
134 if (!dbfile)
135 throw RipleyException("dump: Could not create Silo file");
136
137 /*
138 if (driver==DB_HDF5) {
139 // gzip level 1 already provides good compression with minimal
140 // performance penalty. Some tests showed that gzip levels >3 performed
141 // rather badly on escript data both in terms of time and space
142 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
143 }
144 */
145
146 boost::scoped_ptr<double> x(new double[m_N0]);
147 boost::scoped_ptr<double> y(new double[m_N1]);
148 boost::scoped_ptr<double> z(new double[m_N2]);
149 double* coords[3] = { x.get(), y.get(), z.get() };
150 pair<double,double> xdx = getFirstCoordAndSpacing(0);
151 pair<double,double> ydy = getFirstCoordAndSpacing(1);
152 pair<double,double> zdz = getFirstCoordAndSpacing(2);
153 #pragma omp parallel
154 {
155 #pragma omp for
156 for (dim_t i0 = 0; i0 < m_N0; i0++) {
157 coords[0][i0]=xdx.first+i0*xdx.second;
158 }
159 #pragma omp for
160 for (dim_t i1 = 0; i1 < m_N1; i1++) {
161 coords[1][i1]=ydy.first+i1*ydy.second;
162 }
163 #pragma omp for
164 for (dim_t i2 = 0; i2 < m_N2; i2++) {
165 coords[2][i2]=zdz.first+i2*zdz.second;
166 }
167 }
168 IndexVector dims = getNumNodesPerDim();
169 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
170 DB_COLLINEAR, NULL);
171
172 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
173 NULL, 0, DB_INT, DB_NODECENT, NULL);
174
175 // write element ids
176 dims = getNumElementsPerDim();
177 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
178 &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
179
180 // rank 0 writes multimesh and multivar
181 if (m_mpiInfo->rank == 0) {
182 vector<string> tempstrings;
183 vector<char*> names;
184 for (dim_t i=0; i<m_mpiInfo->size; i++) {
185 stringstream path;
186 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
187 tempstrings.push_back(path.str());
188 names.push_back((char*)tempstrings.back().c_str());
189 }
190 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
191 DBSetDir(dbfile, "/");
192 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
193 &types[0], NULL);
194 tempstrings.clear();
195 names.clear();
196 for (dim_t i=0; i<m_mpiInfo->size; i++) {
197 stringstream path;
198 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
199 tempstrings.push_back(path.str());
200 names.push_back((char*)tempstrings.back().c_str());
201 }
202 types.assign(m_mpiInfo->size, DB_QUADVAR);
203 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
204 &types[0], NULL);
205 tempstrings.clear();
206 names.clear();
207 for (dim_t i=0; i<m_mpiInfo->size; i++) {
208 stringstream path;
209 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
210 tempstrings.push_back(path.str());
211 names.push_back((char*)tempstrings.back().c_str());
212 }
213 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
214 &types[0], NULL);
215 }
216
217 if (m_mpiInfo->size > 1) {
218 #ifdef ESYS_MPI
219 PMPIO_HandOffBaton(baton, dbfile);
220 PMPIO_Finish(baton);
221 #endif
222 } else {
223 DBClose(dbfile);
224 }
225
226 #else // USE_SILO
227 throw RipleyException("dump(): no Silo support");
228 #endif
229 }
230
231 const int* Brick::borrowSampleReferenceIDs(int fsType) const
232 {
233 switch (fsType) {
234 case Nodes:
235 case DegreesOfFreedom: //FIXME
236 case ReducedDegreesOfFreedom: //FIXME
237 return &m_nodeId[0];
238 case Elements:
239 case ReducedElements:
240 return &m_elementId[0];
241 case ReducedFaceElements:
242 return &m_faceId[0];
243 default:
244 break;
245 }
246
247 stringstream msg;
248 msg << "borrowSampleReferenceIDs() not implemented for function space type "
249 << fsType;
250 throw RipleyException(msg.str());
251 }
252
253 bool Brick::ownSample(int fsCode, index_t id) const
254 {
255 #ifdef ESYS_MPI
256 if (fsCode == Nodes) {
257 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
258 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
259 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
260 } else
261 throw RipleyException("ownSample() only implemented for Nodes");
262 #else
263 return true;
264 #endif
265 }
266
267 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
268 {
269 escript::Data& in = *const_cast<escript::Data*>(&cIn);
270 const dim_t numComp = in.getDataPointSize();
271 const double h0 = m_l0/m_gNE0;
272 const double h1 = m_l1/m_gNE1;
273 const double h2 = m_l1/m_gNE2;
274 const double C0 = .044658198738520451079;
275 const double C1 = .16666666666666666667;
276 const double C2 = .21132486540518711775;
277 const double C3 = .25;
278 const double C4 = .5;
279 const double C5 = .62200846792814621559;
280 const double C6 = .78867513459481288225;
281
282 if (out.getFunctionSpace().getTypeCode() == Elements) {
283 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
284 #pragma omp parallel for
285 for (index_t k2=0; k2 < m_NE2; ++k2) {
286 for (index_t k1=0; k1 < m_NE1; ++k1) {
287 for (index_t k0=0; k0 < m_NE0; ++k0) {
288 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
289 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
290 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
291 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
292 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
293 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
294 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
295 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
296 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
297 for (index_t i=0; i < numComp; ++i) {
298 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
299 const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
300 const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
301 const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
302 const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
303 const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
304 const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
305 const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
306 const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
307 const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
308 const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
309 const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
310 o[INDEX3(i,0,0,numComp,3)] = V0;
311 o[INDEX3(i,1,0,numComp,3)] = V4;
312 o[INDEX3(i,2,0,numComp,3)] = V8;
313 o[INDEX3(i,0,1,numComp,3)] = V0;
314 o[INDEX3(i,1,1,numComp,3)] = V5;
315 o[INDEX3(i,2,1,numComp,3)] = V9;
316 o[INDEX3(i,0,2,numComp,3)] = V1;
317 o[INDEX3(i,1,2,numComp,3)] = V4;
318 o[INDEX3(i,2,2,numComp,3)] = V10;
319 o[INDEX3(i,0,3,numComp,3)] = V1;
320 o[INDEX3(i,1,3,numComp,3)] = V5;
321 o[INDEX3(i,2,3,numComp,3)] = V11;
322 o[INDEX3(i,0,4,numComp,3)] = V2;
323 o[INDEX3(i,1,4,numComp,3)] = V6;
324 o[INDEX3(i,2,4,numComp,3)] = V8;
325 o[INDEX3(i,0,5,numComp,3)] = V2;
326 o[INDEX3(i,1,5,numComp,3)] = V7;
327 o[INDEX3(i,2,5,numComp,3)] = V9;
328 o[INDEX3(i,0,6,numComp,3)] = V3;
329 o[INDEX3(i,1,6,numComp,3)] = V6;
330 o[INDEX3(i,2,6,numComp,3)] = V10;
331 o[INDEX3(i,0,7,numComp,3)] = V3;
332 o[INDEX3(i,1,7,numComp,3)] = V7;
333 o[INDEX3(i,2,7,numComp,3)] = V11;
334 } /* end of component loop i */
335 } /* end of k0 loop */
336 } /* end of k1 loop */
337 } /* end of k2 loop */
338 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
339 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
340 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
341 #pragma omp parallel for
342 for (index_t k2=0; k2 < m_NE2; ++k2) {
343 for (index_t k1=0; k1 < m_NE1; ++k1) {
344 for (index_t k0=0; k0 < m_NE0; ++k0) {
345 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
346 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
347 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
348 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
349 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
350 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
351 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
352 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
353 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
354 for (index_t i=0; i < numComp; ++i) {
355 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
356 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
357 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
358 } /* end of component loop i */
359 } /* end of k0 loop */
360 } /* end of k1 loop */
361 } /* end of k2 loop */
362 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
363 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
364 /*** GENERATOR SNIP_GRAD_FACES TOP */
365 #pragma omp parallel
366 {
367 if (m_faceOffset[0] > -1) {
368 #pragma omp for nowait
369 for (index_t k2=0; k2 < m_NE2; ++k2) {
370 for (index_t k1=0; k1 < m_NE1; ++k1) {
371 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
372 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
373 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
374 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
375 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
376 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
377 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
378 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
379 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
380 for (index_t i=0; i < numComp; ++i) {
381 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
382 const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
383 const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
384 const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
385 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
386 o[INDEX3(i,1,0,numComp,3)] = V0;
387 o[INDEX3(i,2,0,numComp,3)] = V2;
388 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
389 o[INDEX3(i,1,1,numComp,3)] = V0;
390 o[INDEX3(i,2,1,numComp,3)] = V3;
391 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
392 o[INDEX3(i,1,2,numComp,3)] = V1;
393 o[INDEX3(i,2,2,numComp,3)] = V2;
394 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
395 o[INDEX3(i,1,3,numComp,3)] = V1;
396 o[INDEX3(i,2,3,numComp,3)] = V3;
397 } /* end of component loop i */
398 } /* end of k1 loop */
399 } /* end of k2 loop */
400 } /* end of face 0 */
401 if (m_faceOffset[1] > -1) {
402 #pragma omp for nowait
403 for (index_t k2=0; k2 < m_NE2; ++k2) {
404 for (index_t k1=0; k1 < m_NE1; ++k1) {
405 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
406 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
407 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
408 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
409 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
410 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
411 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
412 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
413 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
414 for (index_t i=0; i < numComp; ++i) {
415 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
416 const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
417 const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
418 const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
419 o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
420 o[INDEX3(i,1,0,numComp,3)] = V0;
421 o[INDEX3(i,2,0,numComp,3)] = V2;
422 o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
423 o[INDEX3(i,1,1,numComp,3)] = V0;
424 o[INDEX3(i,2,1,numComp,3)] = V3;
425 o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
426 o[INDEX3(i,1,2,numComp,3)] = V1;
427 o[INDEX3(i,2,2,numComp,3)] = V2;
428 o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
429 o[INDEX3(i,1,3,numComp,3)] = V1;
430 o[INDEX3(i,2,3,numComp,3)] = V3;
431 } /* end of component loop i */
432 } /* end of k1 loop */
433 } /* end of k2 loop */
434 } /* end of face 1 */
435 if (m_faceOffset[2] > -1) {
436 #pragma omp for nowait
437 for (index_t k2=0; k2 < m_NE2; ++k2) {
438 for (index_t k0=0; k0 < m_NE0; ++k0) {
439 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
440 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
441 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
442 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
443 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
444 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
445 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
446 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
447 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
448 for (index_t i=0; i < numComp; ++i) {
449 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
450 const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
451 const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
452 o[INDEX3(i,0,0,numComp,3)] = V0;
453 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
454 o[INDEX3(i,2,0,numComp,3)] = V1;
455 o[INDEX3(i,0,1,numComp,3)] = V0;
456 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
457 o[INDEX3(i,2,1,numComp,3)] = V2;
458 o[INDEX3(i,0,2,numComp,3)] = V0;
459 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
460 o[INDEX3(i,2,2,numComp,3)] = V1;
461 o[INDEX3(i,0,3,numComp,3)] = V0;
462 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
463 o[INDEX3(i,2,3,numComp,3)] = V2;
464 } /* end of component loop i */
465 } /* end of k0 loop */
466 } /* end of k2 loop */
467 } /* end of face 2 */
468 if (m_faceOffset[3] > -1) {
469 #pragma omp for nowait
470 for (index_t k2=0; k2 < m_NE2; ++k2) {
471 for (index_t k0=0; k0 < m_NE0; ++k0) {
472 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
473 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
474 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
475 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
476 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
477 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
478 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
479 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
480 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
481 for (index_t i=0; i < numComp; ++i) {
482 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
483 const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
484 const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
485 const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
486 o[INDEX3(i,0,0,numComp,3)] = V0;
487 o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
488 o[INDEX3(i,2,0,numComp,3)] = V2;
489 o[INDEX3(i,0,1,numComp,3)] = V0;
490 o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
491 o[INDEX3(i,2,1,numComp,3)] = V3;
492 o[INDEX3(i,0,2,numComp,3)] = V1;
493 o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
494 o[INDEX3(i,2,2,numComp,3)] = V2;
495 o[INDEX3(i,0,3,numComp,3)] = V1;
496 o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
497 o[INDEX3(i,2,3,numComp,3)] = V3;
498 } /* end of component loop i */
499 } /* end of k0 loop */
500 } /* end of k2 loop */
501 } /* end of face 3 */
502 if (m_faceOffset[4] > -1) {
503 #pragma omp for nowait
504 for (index_t k1=0; k1 < m_NE1; ++k1) {
505 for (index_t k0=0; k0 < m_NE0; ++k0) {
506 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
507 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
508 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
509 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
510 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
511 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
512 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
513 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
514 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
515 for (index_t i=0; i < numComp; ++i) {
516 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
517 const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
518 const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
519 const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
520 o[INDEX3(i,0,0,numComp,3)] = V0;
521 o[INDEX3(i,1,0,numComp,3)] = V2;
522 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
523 o[INDEX3(i,0,1,numComp,3)] = V0;
524 o[INDEX3(i,1,1,numComp,3)] = V3;
525 o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
526 o[INDEX3(i,0,2,numComp,3)] = V1;
527 o[INDEX3(i,1,2,numComp,3)] = V2;
528 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
529 o[INDEX3(i,0,3,numComp,3)] = V1;
530 o[INDEX3(i,1,3,numComp,3)] = V3;
531 o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
532 } /* end of component loop i */
533 } /* end of k0 loop */
534 } /* end of k1 loop */
535 } /* end of face 4 */
536 if (m_faceOffset[5] > -1) {
537 #pragma omp for nowait
538 for (index_t k1=0; k1 < m_NE1; ++k1) {
539 for (index_t k0=0; k0 < m_NE0; ++k0) {
540 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
541 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
542 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
543 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
544 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
545 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
546 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
547 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
548 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
549 for (index_t i=0; i < numComp; ++i) {
550 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
551 const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
552 const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
553 const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
554 o[INDEX3(i,0,0,numComp,3)] = V0;
555 o[INDEX3(i,1,0,numComp,3)] = V2;
556 o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
557 o[INDEX3(i,0,1,numComp,3)] = V0;
558 o[INDEX3(i,1,1,numComp,3)] = V3;
559 o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
560 o[INDEX3(i,0,2,numComp,3)] = V1;
561 o[INDEX3(i,1,2,numComp,3)] = V2;
562 o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
563 o[INDEX3(i,0,3,numComp,3)] = V1;
564 o[INDEX3(i,1,3,numComp,3)] = V3;
565 o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
566 } /* end of component loop i */
567 } /* end of k0 loop */
568 } /* end of k1 loop */
569 } /* end of face 5 */
570 } // end of parallel section
571 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
572 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
573 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
574 #pragma omp parallel
575 {
576 if (m_faceOffset[0] > -1) {
577 #pragma omp for nowait
578 for (index_t k2=0; k2 < m_NE2; ++k2) {
579 for (index_t k1=0; k1 < m_NE1; ++k1) {
580 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
581 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
582 const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
583 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
584 const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
585 const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
586 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
587 const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
588 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
589 for (index_t i=0; i < numComp; ++i) {
590 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
591 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
592 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
593 } /* end of component loop i */
594 } /* end of k1 loop */
595 } /* end of k2 loop */
596 } /* end of face 0 */
597 if (m_faceOffset[1] > -1) {
598 #pragma omp for nowait
599 for (index_t k2=0; k2 < m_NE2; ++k2) {
600 for (index_t k1=0; k1 < m_NE1; ++k1) {
601 const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
602 const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
603 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
604 const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
605 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
606 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
607 const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
608 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
609 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
610 for (index_t i=0; i < numComp; ++i) {
611 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
612 o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
613 o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
614 } /* end of component loop i */
615 } /* end of k1 loop */
616 } /* end of k2 loop */
617 } /* end of face 1 */
618 if (m_faceOffset[2] > -1) {
619 #pragma omp for nowait
620 for (index_t k2=0; k2 < m_NE2; ++k2) {
621 for (index_t k0=0; k0 < m_NE0; ++k0) {
622 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
623 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
624 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
625 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
626 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
627 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
628 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
629 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
630 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
631 for (index_t i=0; i < numComp; ++i) {
632 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
633 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
634 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
635 } /* end of component loop i */
636 } /* end of k0 loop */
637 } /* end of k2 loop */
638 } /* end of face 2 */
639 if (m_faceOffset[3] > -1) {
640 #pragma omp for nowait
641 for (index_t k2=0; k2 < m_NE2; ++k2) {
642 for (index_t k0=0; k0 < m_NE0; ++k0) {
643 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
644 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
645 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
646 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
647 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
648 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
649 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
650 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
651 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
652 for (index_t i=0; i < numComp; ++i) {
653 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
654 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
655 o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
656 } /* end of component loop i */
657 } /* end of k0 loop */
658 } /* end of k2 loop */
659 } /* end of face 3 */
660 if (m_faceOffset[4] > -1) {
661 #pragma omp for nowait
662 for (index_t k1=0; k1 < m_NE1; ++k1) {
663 for (index_t k0=0; k0 < m_NE0; ++k0) {
664 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
665 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
666 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
667 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
668 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
669 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
670 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
671 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
672 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
673 for (index_t i=0; i < numComp; ++i) {
674 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
675 o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
676 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
677 } /* end of component loop i */
678 } /* end of k0 loop */
679 } /* end of k1 loop */
680 } /* end of face 4 */
681 if (m_faceOffset[5] > -1) {
682 #pragma omp for nowait
683 for (index_t k1=0; k1 < m_NE1; ++k1) {
684 for (index_t k0=0; k0 < m_NE0; ++k0) {
685 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
686 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
687 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
688 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
689 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
690 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
691 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
692 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
693 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
694 for (index_t i=0; i < numComp; ++i) {
695 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
696 o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
697 o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
698 } /* end of component loop i */
699 } /* end of k0 loop */
700 } /* end of k1 loop */
701 } /* end of face 5 */
702 } // end of parallel section
703 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
704 } else {
705 stringstream msg;
706 msg << "setToGradient() not implemented for "
707 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
708 throw RipleyException(msg.str());
709 }
710 }
711
712 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
713 {
714 escript::Data& in = *const_cast<escript::Data*>(&arg);
715 const dim_t numComp = in.getDataPointSize();
716 const double h0 = m_l0/m_gNE0;
717 const double h1 = m_l1/m_gNE1;
718 const double h2 = m_l2/m_gNE2;
719 if (arg.getFunctionSpace().getTypeCode() == Elements) {
720 const double w_0 = h0*h1*h2/8.;
721 #pragma omp parallel
722 {
723 vector<double> int_local(numComp, 0);
724 #pragma omp for nowait
725 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
726 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
727 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
728 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
729 for (index_t i=0; i < numComp; ++i) {
730 const register double f_0 = f[INDEX2(i,0,numComp)];
731 const register double f_1 = f[INDEX2(i,1,numComp)];
732 const register double f_2 = f[INDEX2(i,2,numComp)];
733 const register double f_3 = f[INDEX2(i,3,numComp)];
734 const register double f_4 = f[INDEX2(i,4,numComp)];
735 const register double f_5 = f[INDEX2(i,5,numComp)];
736 const register double f_6 = f[INDEX2(i,6,numComp)];
737 const register double f_7 = f[INDEX2(i,7,numComp)];
738 int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
739 } /* end of component loop i */
740 } /* end of k0 loop */
741 } /* end of k1 loop */
742 } /* end of k2 loop */
743
744 #pragma omp critical
745 for (index_t i=0; i<numComp; i++)
746 integrals[i]+=int_local[i];
747 } // end of parallel section
748 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
749 const double w_0 = h0*h1*h2;
750 #pragma omp parallel
751 {
752 vector<double> int_local(numComp, 0);
753 #pragma omp for nowait
754 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
755 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
756 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
757 const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
758 for (index_t i=0; i < numComp; ++i) {
759 int_local[i]+=f[i]*w_0;
760 } /* end of component loop i */
761 } /* end of k0 loop */
762 } /* end of k1 loop */
763 } /* end of k2 loop */
764
765 #pragma omp critical
766 for (index_t i=0; i<numComp; i++)
767 integrals[i]+=int_local[i];
768 } // end of parallel section
769 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
770 const double w_0 = h1*h2/4.;
771 const double w_1 = h0*h2/4.;
772 const double w_2 = h0*h1/4.;
773 #pragma omp parallel
774 {
775 vector<double> int_local(numComp, 0);
776 if (m_faceOffset[0] > -1) {
777 #pragma omp for nowait
778 for (index_t k2=0; k2 < m_NE2; ++k2) {
779 for (index_t k1=0; k1 < m_NE1; ++k1) {
780 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
781 for (index_t i=0; i < numComp; ++i) {
782 const register double f_0 = f[INDEX2(i,0,numComp)];
783 const register double f_1 = f[INDEX2(i,1,numComp)];
784 const register double f_2 = f[INDEX2(i,2,numComp)];
785 const register double f_3 = f[INDEX2(i,3,numComp)];
786 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
787 } /* end of component loop i */
788 } /* end of k1 loop */
789 } /* end of k2 loop */
790 }
791
792 if (m_faceOffset[1] > -1) {
793 #pragma omp for nowait
794 for (index_t k2=0; k2 < m_NE2; ++k2) {
795 for (index_t k1=0; k1 < m_NE1; ++k1) {
796 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
797 for (index_t i=0; i < numComp; ++i) {
798 const register double f_0 = f[INDEX2(i,0,numComp)];
799 const register double f_1 = f[INDEX2(i,1,numComp)];
800 const register double f_2 = f[INDEX2(i,2,numComp)];
801 const register double f_3 = f[INDEX2(i,3,numComp)];
802 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
803 } /* end of component loop i */
804 } /* end of k1 loop */
805 } /* end of k2 loop */
806 }
807
808 if (m_faceOffset[2] > -1) {
809 #pragma omp for nowait
810 for (index_t k2=0; k2 < m_NE2; ++k2) {
811 for (index_t k0=0; k0 < m_NE0; ++k0) {
812 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
813 for (index_t i=0; i < numComp; ++i) {
814 const register double f_0 = f[INDEX2(i,0,numComp)];
815 const register double f_1 = f[INDEX2(i,1,numComp)];
816 const register double f_2 = f[INDEX2(i,2,numComp)];
817 const register double f_3 = f[INDEX2(i,3,numComp)];
818 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
819 } /* end of component loop i */
820 } /* end of k1 loop */
821 } /* end of k2 loop */
822 }
823
824 if (m_faceOffset[3] > -1) {
825 #pragma omp for nowait
826 for (index_t k2=0; k2 < m_NE2; ++k2) {
827 for (index_t k0=0; k0 < m_NE0; ++k0) {
828 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
829 for (index_t i=0; i < numComp; ++i) {
830 const register double f_0 = f[INDEX2(i,0,numComp)];
831 const register double f_1 = f[INDEX2(i,1,numComp)];
832 const register double f_2 = f[INDEX2(i,2,numComp)];
833 const register double f_3 = f[INDEX2(i,3,numComp)];
834 int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
835 } /* end of component loop i */
836 } /* end of k1 loop */
837 } /* end of k2 loop */
838 }
839
840 if (m_faceOffset[4] > -1) {
841 #pragma omp for nowait
842 for (index_t k1=0; k1 < m_NE1; ++k1) {
843 for (index_t k0=0; k0 < m_NE0; ++k0) {
844 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
845 for (index_t i=0; i < numComp; ++i) {
846 const register double f_0 = f[INDEX2(i,0,numComp)];
847 const register double f_1 = f[INDEX2(i,1,numComp)];
848 const register double f_2 = f[INDEX2(i,2,numComp)];
849 const register double f_3 = f[INDEX2(i,3,numComp)];
850 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
851 } /* end of component loop i */
852 } /* end of k1 loop */
853 } /* end of k2 loop */
854 }
855
856 if (m_faceOffset[5] > -1) {
857 #pragma omp for nowait
858 for (index_t k1=0; k1 < m_NE1; ++k1) {
859 for (index_t k0=0; k0 < m_NE0; ++k0) {
860 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
861 for (index_t i=0; i < numComp; ++i) {
862 const register double f_0 = f[INDEX2(i,0,numComp)];
863 const register double f_1 = f[INDEX2(i,1,numComp)];
864 const register double f_2 = f[INDEX2(i,2,numComp)];
865 const register double f_3 = f[INDEX2(i,3,numComp)];
866 int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
867 } /* end of component loop i */
868 } /* end of k1 loop */
869 } /* end of k2 loop */
870 }
871
872 #pragma omp critical
873 for (index_t i=0; i<numComp; i++)
874 integrals[i]+=int_local[i];
875 } // end of parallel section
876
877 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
878 const double w_0 = h1*h2;
879 const double w_1 = h0*h2;
880 const double w_2 = h0*h1;
881 #pragma omp parallel
882 {
883 vector<double> int_local(numComp, 0);
884 if (m_faceOffset[0] > -1) {
885 #pragma omp for nowait
886 for (index_t k2=0; k2 < m_NE2; ++k2) {
887 for (index_t k1=0; k1 < m_NE1; ++k1) {
888 const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
889 for (index_t i=0; i < numComp; ++i) {
890 int_local[i]+=f[i]*w_0;
891 } /* end of component loop i */
892 } /* end of k1 loop */
893 } /* end of k2 loop */
894 }
895
896 if (m_faceOffset[1] > -1) {
897 #pragma omp for nowait
898 for (index_t k2=0; k2 < m_NE2; ++k2) {
899 for (index_t k1=0; k1 < m_NE1; ++k1) {
900 const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
901 for (index_t i=0; i < numComp; ++i) {
902 int_local[i]+=f[i]*w_0;
903 } /* end of component loop i */
904 } /* end of k1 loop */
905 } /* end of k2 loop */
906 }
907
908 if (m_faceOffset[2] > -1) {
909 #pragma omp for nowait
910 for (index_t k2=0; k2 < m_NE2; ++k2) {
911 for (index_t k0=0; k0 < m_NE0; ++k0) {
912 const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
913 for (index_t i=0; i < numComp; ++i) {
914 int_local[i]+=f[i]*w_1;
915 } /* end of component loop i */
916 } /* end of k1 loop */
917 } /* end of k2 loop */
918 }
919
920 if (m_faceOffset[3] > -1) {
921 #pragma omp for nowait
922 for (index_t k2=0; k2 < m_NE2; ++k2) {
923 for (index_t k0=0; k0 < m_NE0; ++k0) {
924 const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
925 for (index_t i=0; i < numComp; ++i) {
926 int_local[i]+=f[i]*w_1;
927 } /* end of component loop i */
928 } /* end of k1 loop */
929 } /* end of k2 loop */
930 }
931
932 if (m_faceOffset[4] > -1) {
933 #pragma omp for nowait
934 for (index_t k1=0; k1 < m_NE1; ++k1) {
935 for (index_t k0=0; k0 < m_NE0; ++k0) {
936 const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
937 for (index_t i=0; i < numComp; ++i) {
938 int_local[i]+=f[i]*w_2;
939 } /* end of component loop i */
940 } /* end of k1 loop */
941 } /* end of k2 loop */
942 }
943
944 if (m_faceOffset[5] > -1) {
945 #pragma omp for nowait
946 for (index_t k1=0; k1 < m_NE1; ++k1) {
947 for (index_t k0=0; k0 < m_NE0; ++k0) {
948 const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
949 for (index_t i=0; i < numComp; ++i) {
950 int_local[i]+=f[i]*w_2;
951 } /* end of component loop i */
952 } /* end of k1 loop */
953 } /* end of k2 loop */
954 }
955
956 #pragma omp critical
957 for (index_t i=0; i<numComp; i++)
958 integrals[i]+=int_local[i];
959 } // end of parallel section
960
961 } else {
962 stringstream msg;
963 msg << "setToIntegrals() not implemented for "
964 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
965 throw RipleyException(msg.str());
966 }
967 }
968
969 void Brick::setToNormal(escript::Data& out) const
970 {
971 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
972 #pragma omp parallel
973 {
974 if (m_faceOffset[0] > -1) {
975 #pragma omp for nowait
976 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
977 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
978 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
979 // set vector at four quadrature points
980 *o++ = -1.; *o++ = 0.; *o++ = 0.;
981 *o++ = -1.; *o++ = 0.; *o++ = 0.;
982 *o++ = -1.; *o++ = 0.; *o++ = 0.;
983 *o++ = -1.; *o++ = 0.; *o = 0.;
984 }
985 }
986 }
987
988 if (m_faceOffset[1] > -1) {
989 #pragma omp for nowait
990 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
991 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
992 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
993 // set vector at four quadrature points
994 *o++ = 1.; *o++ = 0.; *o++ = 0.;
995 *o++ = 1.; *o++ = 0.; *o++ = 0.;
996 *o++ = 1.; *o++ = 0.; *o++ = 0.;
997 *o++ = 1.; *o++ = 0.; *o = 0.;
998 }
999 }
1000 }
1001
1002 if (m_faceOffset[2] > -1) {
1003 #pragma omp for nowait
1004 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1005 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1006 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1007 // set vector at four quadrature points
1008 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1009 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1010 *o++ = 0.; *o++ = -1.; *o++ = 0.;
1011 *o++ = 0.; *o++ = -1.; *o = 0.;
1012 }
1013 }
1014 }
1015
1016 if (m_faceOffset[3] > -1) {
1017 #pragma omp for nowait
1018 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1019 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1020 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1021 // set vector at four quadrature points
1022 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1023 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1024 *o++ = 0.; *o++ = 1.; *o++ = 0.;
1025 *o++ = 0.; *o++ = 1.; *o = 0.;
1026 }
1027 }
1028 }
1029
1030 if (m_faceOffset[4] > -1) {
1031 #pragma omp for nowait
1032 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1033 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1034 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1035 // set vector at four quadrature points
1036 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1037 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1038 *o++ = 0.; *o++ = 0.; *o++ = -1.;
1039 *o++ = 0.; *o++ = 0.; *o = -1.;
1040 }
1041 }
1042 }
1043
1044 if (m_faceOffset[5] > -1) {
1045 #pragma omp for nowait
1046 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1047 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1048 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1049 // set vector at four quadrature points
1050 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1051 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1052 *o++ = 0.; *o++ = 0.; *o++ = 1.;
1053 *o++ = 0.; *o++ = 0.; *o = 1.;
1054 }
1055 }
1056 }
1057 } // end of parallel section
1058 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1059 #pragma omp parallel
1060 {
1061 if (m_faceOffset[0] > -1) {
1062 #pragma omp for nowait
1063 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1064 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1065 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1066 *o++ = -1.;
1067 *o++ = 0.;
1068 *o = 0.;
1069 }
1070 }
1071 }
1072
1073 if (m_faceOffset[1] > -1) {
1074 #pragma omp for nowait
1075 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1076 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1077 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1078 *o++ = 1.;
1079 *o++ = 0.;
1080 *o = 0.;
1081 }
1082 }
1083 }
1084
1085 if (m_faceOffset[2] > -1) {
1086 #pragma omp for nowait
1087 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1088 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1089 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1090 *o++ = 0.;
1091 *o++ = -1.;
1092 *o = 0.;
1093 }
1094 }
1095 }
1096
1097 if (m_faceOffset[3] > -1) {
1098 #pragma omp for nowait
1099 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1100 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1101 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1102 *o++ = 0.;
1103 *o++ = 1.;
1104 *o = 0.;
1105 }
1106 }
1107 }
1108
1109 if (m_faceOffset[4] > -1) {
1110 #pragma omp for nowait
1111 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1112 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1113 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1114 *o++ = 0.;
1115 *o++ = 0.;
1116 *o = -1.;
1117 }
1118 }
1119 }
1120
1121 if (m_faceOffset[5] > -1) {
1122 #pragma omp for nowait
1123 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1124 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1125 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1126 *o++ = 0.;
1127 *o++ = 0.;
1128 *o = 1.;
1129 }
1130 }
1131 }
1132 } // end of parallel section
1133
1134 } else {
1135 stringstream msg;
1136 msg << "setToNormal() not implemented for "
1137 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1138 throw RipleyException(msg.str());
1139 }
1140 }
1141
1142 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1143 bool reducedColOrder) const
1144 {
1145 if (reducedRowOrder || reducedColOrder)
1146 throw RipleyException("getPattern() not implemented for reduced order");
1147
1148 throw RipleyException("getPattern() not implemented");
1149 }
1150
1151 void Brick::Print_Mesh_Info(const bool full) const
1152 {
1153 RipleyDomain::Print_Mesh_Info(full);
1154 if (full) {
1155 cout << " Id Coordinates" << endl;
1156 cout.precision(15);
1157 cout.setf(ios::scientific, ios::floatfield);
1158 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1159 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1160 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1161 for (index_t i=0; i < getNumNodes(); i++) {
1162 cout << " " << setw(5) << m_nodeId[i]
1163 << " " << xdx.first+(i%m_N0)*xdx.second
1164 << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1165 << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1166 }
1167 }
1168 }
1169
1170 IndexVector Brick::getNumNodesPerDim() const
1171 {
1172 IndexVector ret;
1173 ret.push_back(m_N0);
1174 ret.push_back(m_N1);
1175 ret.push_back(m_N2);
1176 return ret;
1177 }
1178
1179 IndexVector Brick::getNumElementsPerDim() const
1180 {
1181 IndexVector ret;
1182 ret.push_back(m_NE0);
1183 ret.push_back(m_NE1);
1184 ret.push_back(m_NE2);
1185 return ret;
1186 }
1187
1188 IndexVector Brick::getNumFacesPerBoundary() const
1189 {
1190 IndexVector ret(6, 0);
1191 //left
1192 if (m_offset0==0)
1193 ret[0]=m_NE1*m_NE2;
1194 //right
1195 if (m_mpiInfo->rank%m_NX==m_NX-1)
1196 ret[1]=m_NE1*m_NE2;
1197 //bottom
1198 if (m_offset1==0)
1199 ret[2]=m_NE0*m_NE2;
1200 //top
1201 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1202 ret[3]=m_NE0*m_NE2;
1203 //front
1204 if (m_offset2==0)
1205 ret[4]=m_NE0*m_NE1;
1206 //back
1207 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1208 ret[5]=m_NE0*m_NE1;
1209 return ret;
1210 }
1211
1212 pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1213 {
1214 if (dim==0)
1215 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1216 else if (dim==1)
1217 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1218 else if (dim==2)
1219 return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1220
1221 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1222 }
1223
1224
1225 //protected
1226 dim_t Brick::getNumFaceElements() const
1227 {
1228 dim_t n=0;
1229 //left
1230 if (m_offset0==0)
1231 n+=m_NE1*m_NE2;
1232 //right
1233 if (m_mpiInfo->rank%m_NX==m_NX-1)
1234 n+=m_NE1*m_NE2;
1235 //bottom
1236 if (m_offset1==0)
1237 n+=m_NE0*m_NE2;
1238 //top
1239 if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1240 n+=m_NE0*m_NE2;
1241 //front
1242 if (m_offset2==0)
1243 n+=m_NE0*m_NE1;
1244 //back
1245 if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1246 n+=m_NE0*m_NE1;
1247
1248 return n;
1249 }
1250
1251 //protected
1252 void Brick::assembleCoordinates(escript::Data& arg) const
1253 {
1254 escriptDataC x = arg.getDataC();
1255 int numDim = m_numDim;
1256 if (!isDataPointShapeEqual(&x, 1, &numDim))
1257 throw RipleyException("setToX: Invalid Data object shape");
1258 if (!numSamplesEqual(&x, 1, getNumNodes()))
1259 throw RipleyException("setToX: Illegal number of samples in Data object");
1260
1261 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1262 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1263 pair<double,double> zdz = getFirstCoordAndSpacing(2);
1264 arg.requireWrite();
1265 #pragma omp parallel for
1266 for (dim_t i2 = 0; i2 < m_N2; i2++) {
1267 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1268 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1269 double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1270 point[0] = xdx.first+i0*xdx.second;
1271 point[1] = ydy.first+i1*ydy.second;
1272 point[2] = zdz.first+i2*zdz.second;
1273 }
1274 }
1275 }
1276 }
1277
1278 //private
1279 void Brick::populateSampleIds()
1280 {
1281 // identifiers are ordered from left to right, bottom to top, front to back
1282 // on each rank, except for the shared nodes which are owned by the rank
1283 // below / to the left / to the front of the current rank
1284
1285 // build node distribution vector first.
1286 // m_nodeDistribution[i] is the first node id on rank i, that is
1287 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1288 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1289 m_nodeDistribution[1]=getNumNodes();
1290 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1291 const index_t x = k%m_NX;
1292 const index_t y = k%(m_NX*m_NY)/m_NX;
1293 const index_t z = k/(m_NX*m_NY);
1294 index_t numNodes=getNumNodes();
1295 if (x>0)
1296 numNodes-=m_N1*m_N2;
1297 if (y>0)
1298 numNodes-=m_N0*m_N2;
1299 if (z>0)
1300 numNodes-=m_N0*m_N1;
1301 // if an edge was subtracted twice add it back
1302 if (x>0 && y>0)
1303 numNodes+=m_N2;
1304 if (x>0 && z>0)
1305 numNodes+=m_N1;
1306 if (y>0 && z>0)
1307 numNodes+=m_N0;
1308 // the corner node was removed 3x and added back 3x, so subtract it
1309 if (x>0 && y>0 && z>0)
1310 numNodes--;
1311 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1312 }
1313 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1314
1315 m_nodeId.resize(getNumNodes());
1316
1317 // the bottom, left and front planes are not owned by this rank so the
1318 // identifiers need to be computed accordingly
1319 const index_t left = (m_offset0==0 ? 0 : 1);
1320 const index_t bottom = (m_offset1==0 ? 0 : 1);
1321 const index_t front = (m_offset2==0 ? 0 : 1);
1322
1323 // case 1: all nodes on left plane are owned by rank on the left
1324 if (left>0) {
1325 const int neighbour=m_mpiInfo->rank-1;
1326 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1327 const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1328 #pragma omp parallel for
1329 for (dim_t i2=front; i2<m_N2; i2++) {
1330 for (dim_t i1=bottom; i1<m_N1; i1++) {
1331 m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1332 + (i1-bottom+1)*leftN0
1333 + (i2-front)*leftN0*leftN1 - 1;
1334 }
1335 }
1336 }
1337 // case 2: all nodes on bottom plane are owned by rank below
1338 if (bottom>0) {
1339 const int neighbour=m_mpiInfo->rank-m_NX;
1340 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1341 const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1342 #pragma omp parallel for
1343 for (dim_t i2=front; i2<m_N2; i2++) {
1344 for (dim_t i0=left; i0<m_N0; i0++) {
1345 m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1346 + bottomN0*(bottomN1-1)
1347 + (i2-front)*bottomN0*bottomN1 + i0-left;
1348 }
1349 }
1350 }
1351 // case 3: all nodes on front plane are owned by rank in front
1352 if (front>0) {
1353 const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
1354 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1355 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1356 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1357 #pragma omp parallel for
1358 for (dim_t i1=bottom; i1<m_N1; i1++) {
1359 for (dim_t i0=left; i0<m_N0; i0++) {
1360 m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
1361 + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
1362 }
1363 }
1364 }
1365 // case 4: nodes on front bottom edge are owned by the corresponding rank
1366 if (front>0 && bottom>0) {
1367 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
1368 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1369 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1370 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1371 #pragma omp parallel for
1372 for (dim_t i0=left; i0<m_N0; i0++) {
1373 m_nodeId[i0]=m_nodeDistribution[neighbour]
1374 + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
1375 }
1376 }
1377 // case 5: nodes on left bottom edge are owned by the corresponding rank
1378 if (left>0 && bottom>0) {
1379 const int neighbour=m_mpiInfo->rank-m_NX-1;
1380 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1381 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1382 #pragma omp parallel for
1383 for (dim_t i2=front; i2<m_N2; i2++) {
1384 m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1385 + (1+i2-front)*N0*N1-1;
1386 }
1387 }
1388 // case 6: nodes on left front edge are owned by the corresponding rank
1389 if (left>0 && front>0) {
1390 const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
1391 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1392 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1393 const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1394 #pragma omp parallel for
1395 for (dim_t i1=bottom; i1<m_N1; i1++) {
1396 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1397 + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
1398 }
1399 }
1400 // case 7: bottom-left-front corner node owned by corresponding rank
1401 if (left>0 && bottom>0 && front>0) {
1402 const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
1403 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1404 const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1405 const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
1406 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
1407 }
1408
1409 // the rest of the id's are contiguous
1410 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1411 #pragma omp parallel for
1412 for (dim_t i2=front; i2<m_N2; i2++) {
1413 for (dim_t i1=bottom; i1<m_N1; i1++) {
1414 for (dim_t i0=left; i0<m_N0; i0++) {
1415 m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
1416 +(i1-bottom)*(m_N0-left)
1417 +(i2-front)*(m_N0-left)*(m_N1-bottom);
1418 }
1419 }
1420 }
1421
1422 // elements
1423 m_elementId.resize(getNumElements());
1424 #pragma omp parallel for
1425 for (dim_t k=0; k<getNumElements(); k++) {
1426 m_elementId[k]=k;
1427 }
1428
1429 // face elements
1430 m_faceId.resize(getNumFaceElements());
1431 #pragma omp parallel for
1432 for (dim_t k=0; k<getNumFaceElements(); k++) {
1433 m_faceId[k]=k;
1434 }
1435
1436 // generate face offset vector and set face tags
1437 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1438 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1439 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1440 m_faceOffset.assign(facesPerEdge.size(), -1);
1441 m_faceTags.clear();
1442 index_t offset=0;
1443 for (size_t i=0; i<facesPerEdge.size(); i++) {
1444 if (facesPerEdge[i]>0) {
1445 m_faceOffset[i]=offset;
1446 offset+=facesPerEdge[i];
1447 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1448 }
1449 }
1450 setTagMap("left", LEFT);
1451 setTagMap("right", RIGHT);
1452 setTagMap("bottom", BOTTOM);
1453 setTagMap("top", TOP);
1454 setTagMap("front", FRONT);
1455 setTagMap("back", BACK);
1456 updateTagsInUse(FaceElements);
1457 }
1458
1459 //protected
1460 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1461 bool reduced) const
1462 {
1463 const dim_t numComp = in.getDataPointSize();
1464 if (reduced) {
1465 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1466 const double c0 = .125;
1467 #pragma omp parallel for
1468 for (index_t k2=0; k2 < m_NE2; ++k2) {
1469 for (index_t k1=0; k1 < m_NE1; ++k1) {
1470 for (index_t k0=0; k0 < m_NE0; ++k0) {
1471 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1472 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1473 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1474 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1475 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1476 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1477 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1478 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1479 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1480 for (index_t i=0; i < numComp; ++i) {
1481 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1482 } /* end of component loop i */
1483 } /* end of k0 loop */
1484 } /* end of k1 loop */
1485 } /* end of k2 loop */
1486 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1487 } else {
1488 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1489 const double c0 = .0094373878376559314545;
1490 const double c1 = .035220810900864519624;
1491 const double c2 = .13144585576580214704;
1492 const double c3 = .49056261216234406855;
1493 #pragma omp parallel for
1494 for (index_t k2=0; k2 < m_NE2; ++k2) {
1495 for (index_t k1=0; k1 < m_NE1; ++k1) {
1496 for (index_t k0=0; k0 < m_NE0; ++k0) {
1497 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1498 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1499 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1500 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1501 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1502 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1503 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1504 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1505 double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1506 for (index_t i=0; i < numComp; ++i) {
1507 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1508 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1509 o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1510 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1511 o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1512 o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1513 o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1514 o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1515 } /* end of component loop i */
1516 } /* end of k0 loop */
1517 } /* end of k1 loop */
1518 } /* end of k2 loop */
1519 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1520 }
1521 }
1522
1523 //protected
1524 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1525 bool reduced) const
1526 {
1527 const dim_t numComp = in.getDataPointSize();
1528 if (reduced) {
1529 const double c0 = .25;
1530 #pragma omp parallel
1531 {
1532 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1533 if (m_faceOffset[0] > -1) {
1534 #pragma omp for nowait
1535 for (index_t k2=0; k2 < m_NE2; ++k2) {
1536 for (index_t k1=0; k1 < m_NE1; ++k1) {
1537 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1538 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1539 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1540 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1541 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1542 for (index_t i=0; i < numComp; ++i) {
1543 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1544 } /* end of component loop i */
1545 } /* end of k1 loop */
1546 } /* end of k2 loop */
1547 } /* end of face 0 */
1548 if (m_faceOffset[1] > -1) {
1549 #pragma omp for nowait
1550 for (index_t k2=0; k2 < m_NE2; ++k2) {
1551 for (index_t k1=0; k1 < m_NE1; ++k1) {
1552 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1553 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1554 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1555 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1556 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1557 for (index_t i=0; i < numComp; ++i) {
1558 o[INDEX2(i,numComp,0)] = c0*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1559 } /* end of component loop i */
1560 } /* end of k1 loop */
1561 } /* end of k2 loop */
1562 } /* end of face 1 */
1563 if (m_faceOffset[2] > -1) {
1564 #pragma omp for nowait
1565 for (index_t k2=0; k2 < m_NE2; ++k2) {
1566 for (index_t k0=0; k0 < m_NE0; ++k0) {
1567 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1568 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1569 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1570 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1571 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1572 for (index_t i=0; i < numComp; ++i) {
1573 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
1574 } /* end of component loop i */
1575 } /* end of k0 loop */
1576 } /* end of k2 loop */
1577 } /* end of face 2 */
1578 if (m_faceOffset[3] > -1) {
1579 #pragma omp for nowait
1580 for (index_t k2=0; k2 < m_NE2; ++k2) {
1581 for (index_t k0=0; k0 < m_NE0; ++k0) {
1582 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1583 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1584 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1585 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1586 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1587 for (index_t i=0; i < numComp; ++i) {
1588 o[INDEX2(i,numComp,0)] = c0*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
1589 } /* end of component loop i */
1590 } /* end of k0 loop */
1591 } /* end of k2 loop */
1592 } /* end of face 3 */
1593 if (m_faceOffset[4] > -1) {
1594 #pragma omp for nowait
1595 for (index_t k1=0; k1 < m_NE1; ++k1) {
1596 for (index_t k0=0; k0 < m_NE0; ++k0) {
1597 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1598 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1599 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1600 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1601 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1602 for (index_t i=0; i < numComp; ++i) {
1603 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_010[i] + f_100[i] + f_110[i]);
1604 } /* end of component loop i */
1605 } /* end of k0 loop */
1606 } /* end of k1 loop */
1607 } /* end of face 4 */
1608 if (m_faceOffset[5] > -1) {
1609 #pragma omp for nowait
1610 for (index_t k1=0; k1 < m_NE1; ++k1) {
1611 for (index_t k0=0; k0 < m_NE0; ++k0) {
1612 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1613 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1614 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1615 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1616 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1617 for (index_t i=0; i < numComp; ++i) {
1618 o[INDEX2(i,numComp,0)] = c0*(f_001[i] + f_011[i] + f_101[i] + f_111[i]);
1619 } /* end of component loop i */
1620 } /* end of k0 loop */
1621 } /* end of k1 loop */
1622 } /* end of face 5 */
1623 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1624 } // end of parallel section
1625 } else {
1626 const double c0 = 0.044658198738520451079;
1627 const double c1 = 0.16666666666666666667;
1628 const double c2 = 0.62200846792814621559;
1629 #pragma omp parallel
1630 {
1631 /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1632 if (m_faceOffset[0] > -1) {
1633 #pragma omp for nowait
1634 for (index_t k2=0; k2 < m_NE2; ++k2) {
1635 for (index_t k1=0; k1 < m_NE1; ++k1) {
1636 const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1637 const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1638 const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1639 const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1640 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1641 for (index_t i=0; i < numComp; ++i) {
1642 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_011[i]*c0 + c1*(f_001[i] + f_010[i]);
1643 o[INDEX2(i,numComp,1)] = f_001[i]*c0 + f_010[i]*c2 + c1*(f_000[i] + f_011[i]);
1644 o[INDEX2(i,numComp,2)] = f_001[i]*c2 + f_010[i]*c0 + c1*(f_000[i] + f_011[i]);
1645 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_011[i]*c2 + c1*(f_001[i] + f_010[i]);
1646 } /* end of component loop i */
1647 } /* end of k1 loop */
1648 } /* end of k2 loop */
1649 } /* end of face 0 */
1650 if (m_faceOffset[1] > -1) {
1651 #pragma omp for nowait
1652 for (index_t k2=0; k2 < m_NE2; ++k2) {
1653 for (index_t k1=0; k1 < m_NE1; ++k1) {
1654 const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1655 const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1656 const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1657 const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1658 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1659 for (index_t i=0; i < numComp; ++i) {
1660 o[INDEX2(i,numComp,0)] = f_100[i]*c2 + f_111[i]*c0 + c1*(f_101[i] + f_110[i]);
1661 o[INDEX2(i,numComp,1)] = f_101[i]*c0 + f_110[i]*c2 + c1*(f_100[i] + f_111[i]);
1662 o[INDEX2(i,numComp,2)] = f_101[i]*c2 + f_110[i]*c0 + c1*(f_100[i] + f_111[i]);
1663 o[INDEX2(i,numComp,3)] = f_100[i]*c0 + f_111[i]*c2 + c1*(f_101[i] + f_110[i]);
1664 } /* end of component loop i */
1665 } /* end of k1 loop */
1666 } /* end of k2 loop */
1667 } /* end of face 1 */
1668 if (m_faceOffset[2] > -1) {
1669 #pragma omp for nowait
1670 for (index_t k2=0; k2 < m_NE2; ++k2) {
1671 for (index_t k0=0; k0 < m_NE0; ++k0) {
1672 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1673 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1674 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1675 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1676 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1677 for (index_t i=0; i < numComp; ++i) {
1678 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_101[i]*c0 + c1*(f_001[i] + f_100[i]);
1679 o[INDEX2(i,numComp,1)] = f_001[i]*c0 + f_100[i]*c2 + c1*(f_000[i] + f_101[i]);
1680 o[INDEX2(i,numComp,2)] = f_001[i]*c2 + f_100[i]*c0 + c1*(f_000[i] + f_101[i]);
1681 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_101[i]*c2 + c1*(f_001[i] + f_100[i]);
1682 } /* end of component loop i */
1683 } /* end of k0 loop */
1684 } /* end of k2 loop */
1685 } /* end of face 2 */
1686 if (m_faceOffset[3] > -1) {
1687 #pragma omp for nowait
1688 for (index_t k2=0; k2 < m_NE2; ++k2) {
1689 for (index_t k0=0; k0 < m_NE0; ++k0) {
1690 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1691 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1692 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1693 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1694 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1695 for (index_t i=0; i < numComp; ++i) {
1696 o[INDEX2(i,numComp,0)] = f_010[i]*c2 + f_111[i]*c0 + c1*(f_011[i] + f_110[i]);
1697 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_110[i]*c2 + c1*(f_010[i] + f_111[i]);
1698 o[INDEX2(i,numComp,2)] = f_011[i]*c2 + f_110[i]*c0 + c1*(f_010[i] + f_111[i]);
1699 o[INDEX2(i,numComp,3)] = f_010[i]*c0 + f_111[i]*c2 + c1*(f_011[i] + f_110[i]);
1700 } /* end of component loop i */
1701 } /* end of k0 loop */
1702 } /* end of k2 loop */
1703 } /* end of face 3 */
1704 if (m_faceOffset[4] > -1) {
1705 #pragma omp for nowait
1706 for (index_t k1=0; k1 < m_NE1; ++k1) {
1707 for (index_t k0=0; k0 < m_NE0; ++k0) {
1708 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1709 const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1710 const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1711 const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1712 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1713 for (index_t i=0; i < numComp; ++i) {
1714 o[INDEX2(i,numComp,0)] = f_000[i]*c2 + f_110[i]*c0 + c1*(f_010[i] + f_100[i]);
1715 o[INDEX2(i,numComp,1)] = f_010[i]*c0 + f_100[i]*c2 + c1*(f_000[i] + f_110[i]);
1716 o[INDEX2(i,numComp,2)] = f_010[i]*c2 + f_100[i]*c0 + c1*(f_000[i] + f_110[i]);
1717 o[INDEX2(i,numComp,3)] = f_000[i]*c0 + f_110[i]*c2 + c1*(f_010[i] + f_100[i]);
1718 } /* end of component loop i */
1719 } /* end of k0 loop */
1720 } /* end of k1 loop */
1721 } /* end of face 4 */
1722 if (m_faceOffset[5] > -1) {
1723 #pragma omp for nowait
1724 for (index_t k1=0; k1 < m_NE1; ++k1) {
1725 for (index_t k0=0; k0 < m_NE0; ++k0) {
1726 const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1727 const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1728 const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1729 const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1730 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1731 for (index_t i=0; i < numComp; ++i) {
1732 o[INDEX2(i,numComp,0)] = f_001[i]*c2 + f_111[i]*c0 + c1*(f_011[i] + f_101[i]);
1733 o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_101[i]*c2 + c1*(f_001[i] + f_111[i]);
1734 o[INDEX2(i,numComp,2)] = f_011[i]*c2 + f_101[i]*c0 + c1*(f_001[i] + f_111[i]);
1735 o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_111[i]*c2 + c1*(f_011[i] + f_101[i]);
1736 } /* end of component loop i */
1737 } /* end of k0 loop */
1738 } /* end of k1 loop */
1739 } /* end of face 5 */
1740 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1741 } // end of parallel section
1742 }
1743 }
1744
1745 } // end of namespace ripley
1746

  ViewVC Help
Powered by ViewVC 1.1.26