/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3731 - (hide annotations)
Fri Dec 9 01:22:44 2011 UTC (7 years, 3 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 93416 byte(s)
More hand optimization with noticeable runtime reduction for tests.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Brick.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33     int d1, int d2) :
34     RipleyDomain(3),
35     m_gNE0(n0),
36     m_gNE1(n1),
37     m_gNE2(n2),
38     m_l0(l0),
39     m_l1(l1),
40     m_l2(l2),
41     m_NX(d0),
42     m_NY(d1),
43     m_NZ(d2)
44     {
45     // ensure number of subdivisions is valid and nodes can be distributed
46     // among number of ranks
47     if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48     throw RipleyException("Invalid number of spatial subdivisions");
49    
50     if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52    
53     // local number of elements
54     m_NE0 = n0/m_NX;
55     m_NE1 = n1/m_NY;
56     m_NE2 = n2/m_NZ;
57     // local number of nodes (not necessarily owned)
58     m_N0 = m_NE0+1;
59     m_N1 = m_NE1+1;
60     m_N2 = m_NE2+1;
61     // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63     m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64     m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65     populateSampleIds();
66     }
67    
68    
69     Brick::~Brick()
70     {
71     }
72    
73     string Brick::getDescription() const
74     {
75     return "ripley::Brick";
76     }
77    
78     bool Brick::operator==(const AbstractDomain& other) const
79     {
80     if (dynamic_cast<const Brick*>(&other))
81     return this==&other;
82    
83     return false;
84     }
85    
86     void Brick::dump(const string& fileName) const
87     {
88     #if USE_SILO
89     string fn(fileName);
90     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
91     fn+=".silo";
92     }
93    
94     const int NUM_SILO_FILES = 1;
95     const char* blockDirFmt = "/block%04d";
96     int driver=DB_HDF5;
97     string siloPath;
98     DBfile* dbfile = NULL;
99    
100     #ifdef ESYS_MPI
101     PMPIO_baton_t* baton = NULL;
102     #endif
103    
104     if (m_mpiInfo->size > 1) {
105     #ifdef ESYS_MPI
106     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
107     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
108     PMPIO_DefaultClose, (void*)&driver);
109     // try the fallback driver in case of error
110     if (!baton && driver != DB_PDB) {
111     driver = DB_PDB;
112     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
113     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
114     PMPIO_DefaultClose, (void*)&driver);
115     }
116     if (baton) {
117     char str[64];
118     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
119     siloPath = str;
120     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
121     }
122     #endif
123     } else {
124     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
125     getDescription().c_str(), driver);
126     // try the fallback driver in case of error
127     if (!dbfile && driver != DB_PDB) {
128     driver = DB_PDB;
129     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130     getDescription().c_str(), driver);
131     }
132     }
133    
134     if (!dbfile)
135     throw RipleyException("dump: Could not create Silo file");
136    
137     /*
138     if (driver==DB_HDF5) {
139     // gzip level 1 already provides good compression with minimal
140     // performance penalty. Some tests showed that gzip levels >3 performed
141     // rather badly on escript data both in terms of time and space
142     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
143     }
144     */
145    
146     boost::scoped_ptr<double> x(new double[m_N0]);
147     boost::scoped_ptr<double> y(new double[m_N1]);
148     boost::scoped_ptr<double> z(new double[m_N2]);
149     double* coords[3] = { x.get(), y.get(), z.get() };
150 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
151     pair<double,double> ydy = getFirstCoordAndSpacing(1);
152     pair<double,double> zdz = getFirstCoordAndSpacing(2);
153 caltinay 3691 #pragma omp parallel
154     {
155     #pragma omp for
156     for (dim_t i0 = 0; i0 < m_N0; i0++) {
157 caltinay 3698 coords[0][i0]=xdx.first+i0*xdx.second;
158 caltinay 3691 }
159     #pragma omp for
160     for (dim_t i1 = 0; i1 < m_N1; i1++) {
161 caltinay 3698 coords[1][i1]=ydy.first+i1*ydy.second;
162 caltinay 3691 }
163     #pragma omp for
164     for (dim_t i2 = 0; i2 < m_N2; i2++) {
165 caltinay 3698 coords[2][i2]=zdz.first+i2*zdz.second;
166 caltinay 3691 }
167     }
168 caltinay 3698 IndexVector dims = getNumNodesPerDim();
169     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
170 caltinay 3691 DB_COLLINEAR, NULL);
171    
172 caltinay 3698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
173     NULL, 0, DB_INT, DB_NODECENT, NULL);
174 caltinay 3691
175 caltinay 3698 // write element ids
176     dims = getNumElementsPerDim();
177     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
178     &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
179    
180     // rank 0 writes multimesh and multivar
181 caltinay 3691 if (m_mpiInfo->rank == 0) {
182     vector<string> tempstrings;
183     vector<char*> names;
184     for (dim_t i=0; i<m_mpiInfo->size; i++) {
185     stringstream path;
186     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
187     tempstrings.push_back(path.str());
188     names.push_back((char*)tempstrings.back().c_str());
189     }
190     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
191     DBSetDir(dbfile, "/");
192     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     types.assign(m_mpiInfo->size, DB_QUADVAR);
203     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
204     &types[0], NULL);
205 caltinay 3698 tempstrings.clear();
206     names.clear();
207     for (dim_t i=0; i<m_mpiInfo->size; i++) {
208     stringstream path;
209     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
210     tempstrings.push_back(path.str());
211     names.push_back((char*)tempstrings.back().c_str());
212     }
213     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
214     &types[0], NULL);
215 caltinay 3691 }
216    
217     if (m_mpiInfo->size > 1) {
218     #ifdef ESYS_MPI
219     PMPIO_HandOffBaton(baton, dbfile);
220     PMPIO_Finish(baton);
221     #endif
222     } else {
223     DBClose(dbfile);
224     }
225    
226     #else // USE_SILO
227     throw RipleyException("dump(): no Silo support");
228     #endif
229     }
230    
231     const int* Brick::borrowSampleReferenceIDs(int fsType) const
232     {
233 caltinay 3697 switch (fsType) {
234     case Nodes:
235     return &m_nodeId[0];
236     case Elements:
237     return &m_elementId[0];
238     case FaceElements:
239     return &m_faceId[0];
240     default:
241     break;
242     }
243 caltinay 3691
244 caltinay 3697 stringstream msg;
245     msg << "borrowSampleReferenceIDs() not implemented for function space type "
246     << fsType;
247     throw RipleyException(msg.str());
248 caltinay 3691 }
249    
250     bool Brick::ownSample(int fsCode, index_t id) const
251     {
252     #ifdef ESYS_MPI
253     if (fsCode == Nodes) {
254 caltinay 3698 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
255     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
256 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
257     } else
258     throw RipleyException("ownSample() only implemented for Nodes");
259     #else
260     return true;
261     #endif
262     }
263    
264 caltinay 3703 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
265     {
266     escript::Data& in = *const_cast<escript::Data*>(&cIn);
267     const dim_t numComp = in.getDataPointSize();
268     const double h0 = m_l0/m_gNE0;
269     const double h1 = m_l1/m_gNE1;
270     const double h2 = m_l1/m_gNE2;
271 caltinay 3731 const double C0 = .044658198738520451079;
272     const double C1 = .16666666666666666667;
273     const double C2 = .21132486540518711775;
274     const double C3 = .25;
275     const double C4 = .5;
276     const double C5 = .62200846792814621559;
277     const double C6 = .78867513459481288225;
278    
279 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
280 caltinay 3731 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
281 caltinay 3703 #pragma omp parallel for
282 caltinay 3707 for (index_t k2=0; k2 < m_NE2; ++k2) {
283     for (index_t k1=0; k1 < m_NE1; ++k1) {
284     for (index_t k0=0; k0 < m_NE0; ++k0) {
285 caltinay 3703 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
286     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
287     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
288     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
289     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
290     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
291     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
292     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
293     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
294     for (index_t i=0; i < numComp; ++i) {
295 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
296     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
297     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
298     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
299     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
300     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
301     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
302     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
303     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
304     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
305     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
306     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
307     o[INDEX3(i,0,0,numComp,3)] = V0;
308     o[INDEX3(i,1,0,numComp,3)] = V4;
309     o[INDEX3(i,2,0,numComp,3)] = V8;
310     o[INDEX3(i,0,1,numComp,3)] = V0;
311     o[INDEX3(i,1,1,numComp,3)] = V5;
312     o[INDEX3(i,2,1,numComp,3)] = V9;
313     o[INDEX3(i,0,2,numComp,3)] = V1;
314     o[INDEX3(i,1,2,numComp,3)] = V4;
315     o[INDEX3(i,2,2,numComp,3)] = V10;
316     o[INDEX3(i,0,3,numComp,3)] = V1;
317     o[INDEX3(i,1,3,numComp,3)] = V5;
318     o[INDEX3(i,2,3,numComp,3)] = V11;
319     o[INDEX3(i,0,4,numComp,3)] = V2;
320     o[INDEX3(i,1,4,numComp,3)] = V6;
321     o[INDEX3(i,2,4,numComp,3)] = V8;
322     o[INDEX3(i,0,5,numComp,3)] = V2;
323     o[INDEX3(i,1,5,numComp,3)] = V7;
324     o[INDEX3(i,2,5,numComp,3)] = V9;
325     o[INDEX3(i,0,6,numComp,3)] = V3;
326     o[INDEX3(i,1,6,numComp,3)] = V6;
327     o[INDEX3(i,2,6,numComp,3)] = V10;
328     o[INDEX3(i,0,7,numComp,3)] = V3;
329     o[INDEX3(i,1,7,numComp,3)] = V7;
330     o[INDEX3(i,2,7,numComp,3)] = V11;
331 caltinay 3703 } /* end of component loop i */
332     } /* end of k0 loop */
333     } /* end of k1 loop */
334     } /* end of k2 loop */
335     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
336 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
337 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
338 caltinay 3711 #pragma omp parallel for
339     for (index_t k2=0; k2 < m_NE2; ++k2) {
340     for (index_t k1=0; k1 < m_NE1; ++k1) {
341     for (index_t k0=0; k0 < m_NE0; ++k0) {
342     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
343     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
344     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
345     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
346     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
347     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
348     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
349     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
350     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
351     for (index_t i=0; i < numComp; ++i) {
352 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
353     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
354     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
355 caltinay 3711 } /* end of component loop i */
356     } /* end of k0 loop */
357     } /* end of k1 loop */
358     } /* end of k2 loop */
359     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
360 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
361 caltinay 3731 /*** GENERATOR SNIP_GRAD_FACES TOP */
362 caltinay 3722 #pragma omp parallel
363     {
364     if (m_faceOffset[0] > -1) {
365     #pragma omp for nowait
366     for (index_t k2=0; k2 < m_NE2; ++k2) {
367     for (index_t k1=0; k1 < m_NE1; ++k1) {
368     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
369     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
370     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
371     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
372     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
373     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
374     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
375     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
376     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
377     for (index_t i=0; i < numComp; ++i) {
378 caltinay 3731 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
379     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
380     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
381     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
382     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
383     o[INDEX3(i,1,0,numComp,3)] = V0;
384     o[INDEX3(i,2,0,numComp,3)] = V2;
385     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
386     o[INDEX3(i,1,1,numComp,3)] = V0;
387     o[INDEX3(i,2,1,numComp,3)] = V3;
388     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
389     o[INDEX3(i,1,2,numComp,3)] = V1;
390     o[INDEX3(i,2,2,numComp,3)] = V2;
391     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
392     o[INDEX3(i,1,3,numComp,3)] = V1;
393     o[INDEX3(i,2,3,numComp,3)] = V3;
394 caltinay 3722 } /* end of component loop i */
395     } /* end of k1 loop */
396     } /* end of k2 loop */
397     } /* end of face 0 */
398     if (m_faceOffset[1] > -1) {
399     #pragma omp for nowait
400     for (index_t k2=0; k2 < m_NE2; ++k2) {
401     for (index_t k1=0; k1 < m_NE1; ++k1) {
402     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
403     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
404     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
405     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
406     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
407     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
408     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
409     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
410     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
411     for (index_t i=0; i < numComp; ++i) {
412 caltinay 3731 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
413     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
414     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
415     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
416     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
417     o[INDEX3(i,1,0,numComp,3)] = V0;
418     o[INDEX3(i,2,0,numComp,3)] = V2;
419     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
420     o[INDEX3(i,1,1,numComp,3)] = V0;
421     o[INDEX3(i,2,1,numComp,3)] = V3;
422     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
423     o[INDEX3(i,1,2,numComp,3)] = V1;
424     o[INDEX3(i,2,2,numComp,3)] = V2;
425     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
426     o[INDEX3(i,1,3,numComp,3)] = V1;
427     o[INDEX3(i,2,3,numComp,3)] = V3;
428 caltinay 3722 } /* end of component loop i */
429     } /* end of k1 loop */
430     } /* end of k2 loop */
431     } /* end of face 1 */
432     if (m_faceOffset[2] > -1) {
433     #pragma omp for nowait
434     for (index_t k2=0; k2 < m_NE2; ++k2) {
435     for (index_t k0=0; k0 < m_NE0; ++k0) {
436     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
437     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
438     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
439     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
440     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
441     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
442     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
443     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
444     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
445     for (index_t i=0; i < numComp; ++i) {
446 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
447     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
448     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
449     o[INDEX3(i,0,0,numComp,3)] = V0;
450     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
451     o[INDEX3(i,2,0,numComp,3)] = V1;
452     o[INDEX3(i,0,1,numComp,3)] = V0;
453     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
454     o[INDEX3(i,2,1,numComp,3)] = V2;
455     o[INDEX3(i,0,2,numComp,3)] = V0;
456     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
457     o[INDEX3(i,2,2,numComp,3)] = V1;
458     o[INDEX3(i,0,3,numComp,3)] = V0;
459     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
460     o[INDEX3(i,2,3,numComp,3)] = V2;
461 caltinay 3722 } /* end of component loop i */
462     } /* end of k0 loop */
463     } /* end of k2 loop */
464     } /* end of face 2 */
465     if (m_faceOffset[3] > -1) {
466     #pragma omp for nowait
467     for (index_t k2=0; k2 < m_NE2; ++k2) {
468     for (index_t k0=0; k0 < m_NE0; ++k0) {
469     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
470     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
471     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
472     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
473     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
474     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
475     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
476     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
477     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
478     for (index_t i=0; i < numComp; ++i) {
479 caltinay 3731 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
480     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
481     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
482     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
483     o[INDEX3(i,0,0,numComp,3)] = V0;
484     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
485     o[INDEX3(i,2,0,numComp,3)] = V2;
486     o[INDEX3(i,0,1,numComp,3)] = V0;
487     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
488     o[INDEX3(i,2,1,numComp,3)] = V3;
489     o[INDEX3(i,0,2,numComp,3)] = V1;
490     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
491     o[INDEX3(i,2,2,numComp,3)] = V2;
492     o[INDEX3(i,0,3,numComp,3)] = V1;
493     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
494     o[INDEX3(i,2,3,numComp,3)] = V3;
495 caltinay 3722 } /* end of component loop i */
496     } /* end of k0 loop */
497     } /* end of k2 loop */
498     } /* end of face 3 */
499     if (m_faceOffset[4] > -1) {
500     #pragma omp for nowait
501 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
502 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
503     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
504     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
505     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
506     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
507     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
508     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
509     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
510     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
511     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
512     for (index_t i=0; i < numComp; ++i) {
513 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
514     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
515     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
516     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
517     o[INDEX3(i,0,0,numComp,3)] = V0;
518     o[INDEX3(i,1,0,numComp,3)] = V2;
519     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
520     o[INDEX3(i,0,1,numComp,3)] = V0;
521     o[INDEX3(i,1,1,numComp,3)] = V3;
522     o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
523     o[INDEX3(i,0,2,numComp,3)] = V1;
524     o[INDEX3(i,1,2,numComp,3)] = V2;
525     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
526     o[INDEX3(i,0,3,numComp,3)] = V1;
527     o[INDEX3(i,1,3,numComp,3)] = V3;
528     o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
529 caltinay 3722 } /* end of component loop i */
530     } /* end of k0 loop */
531 caltinay 3707 } /* end of k1 loop */
532 caltinay 3722 } /* end of face 4 */
533     if (m_faceOffset[5] > -1) {
534     #pragma omp for nowait
535 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
536 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
537     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
538     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
539     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
540     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
541     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
542     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
543     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
544     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
545     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
546     for (index_t i=0; i < numComp; ++i) {
547 caltinay 3731 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
548     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
549     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
550     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
551     o[INDEX3(i,0,0,numComp,3)] = V0;
552     o[INDEX3(i,1,0,numComp,3)] = V2;
553     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
554     o[INDEX3(i,0,1,numComp,3)] = V0;
555     o[INDEX3(i,1,1,numComp,3)] = V3;
556     o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
557     o[INDEX3(i,0,2,numComp,3)] = V1;
558     o[INDEX3(i,1,2,numComp,3)] = V2;
559     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
560     o[INDEX3(i,0,3,numComp,3)] = V1;
561     o[INDEX3(i,1,3,numComp,3)] = V3;
562     o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
563 caltinay 3722 } /* end of component loop i */
564     } /* end of k0 loop */
565 caltinay 3707 } /* end of k1 loop */
566 caltinay 3722 } /* end of face 5 */
567     } // end of parallel section
568 caltinay 3707 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
569 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
570 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
571 caltinay 3722 #pragma omp parallel
572     {
573     if (m_faceOffset[0] > -1) {
574     #pragma omp for nowait
575     for (index_t k2=0; k2 < m_NE2; ++k2) {
576     for (index_t k1=0; k1 < m_NE1; ++k1) {
577     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
578     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
579     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
580     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
581     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
582     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
583     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
584     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
585     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
586     for (index_t i=0; i < numComp; ++i) {
587 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
588     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
589     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
590 caltinay 3722 } /* end of component loop i */
591     } /* end of k1 loop */
592     } /* end of k2 loop */
593     } /* end of face 0 */
594     if (m_faceOffset[1] > -1) {
595     #pragma omp for nowait
596     for (index_t k2=0; k2 < m_NE2; ++k2) {
597     for (index_t k1=0; k1 < m_NE1; ++k1) {
598     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
599     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
600     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
601     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
602     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
603     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
604     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
605     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
606     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
607     for (index_t i=0; i < numComp; ++i) {
608 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
609     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
610     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
611 caltinay 3722 } /* end of component loop i */
612     } /* end of k1 loop */
613     } /* end of k2 loop */
614     } /* end of face 1 */
615     if (m_faceOffset[2] > -1) {
616     #pragma omp for nowait
617     for (index_t k2=0; k2 < m_NE2; ++k2) {
618     for (index_t k0=0; k0 < m_NE0; ++k0) {
619     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
620     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
621     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
622     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
623     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
624     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
625     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
626     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
627     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
628     for (index_t i=0; i < numComp; ++i) {
629 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
630     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
631     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
632 caltinay 3722 } /* end of component loop i */
633     } /* end of k0 loop */
634     } /* end of k2 loop */
635     } /* end of face 2 */
636     if (m_faceOffset[3] > -1) {
637     #pragma omp for nowait
638     for (index_t k2=0; k2 < m_NE2; ++k2) {
639     for (index_t k0=0; k0 < m_NE0; ++k0) {
640     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
641     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
642     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
643     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
644     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
645     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
646     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
647     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
648     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
649     for (index_t i=0; i < numComp; ++i) {
650 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
651     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
652     o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
653 caltinay 3722 } /* end of component loop i */
654     } /* end of k0 loop */
655     } /* end of k2 loop */
656     } /* end of face 3 */
657     if (m_faceOffset[4] > -1) {
658     #pragma omp for nowait
659 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
660 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
661     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
662     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
663     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
664     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
665     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
666     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
667     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
668     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
669     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
670     for (index_t i=0; i < numComp; ++i) {
671 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
672     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
673     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
674 caltinay 3722 } /* end of component loop i */
675     } /* end of k0 loop */
676 caltinay 3711 } /* end of k1 loop */
677 caltinay 3722 } /* end of face 4 */
678     if (m_faceOffset[5] > -1) {
679     #pragma omp for nowait
680 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
681 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
682     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
683     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
684     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
685     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
686     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
687     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
688     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
689     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
690     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
691     for (index_t i=0; i < numComp; ++i) {
692 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
693     o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
694     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
695 caltinay 3722 } /* end of component loop i */
696     } /* end of k0 loop */
697 caltinay 3711 } /* end of k1 loop */
698 caltinay 3722 } /* end of face 5 */
699     } // end of parallel section
700 caltinay 3711 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
701 caltinay 3703 } else {
702 caltinay 3707 stringstream msg;
703     msg << "setToGradient() not implemented for "
704     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
705     throw RipleyException(msg.str());
706 caltinay 3703 }
707     }
708    
709 caltinay 3713 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
710     {
711     escript::Data& in = *const_cast<escript::Data*>(&arg);
712     const dim_t numComp = in.getDataPointSize();
713     const double h0 = m_l0/m_gNE0;
714     const double h1 = m_l1/m_gNE1;
715     const double h2 = m_l2/m_gNE2;
716     if (arg.getFunctionSpace().getTypeCode() == Elements) {
717     const double w_0 = h0*h1*h2/8.;
718     #pragma omp parallel
719     {
720     vector<double> int_local(numComp, 0);
721 caltinay 3722 #pragma omp for nowait
722 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
723     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
724     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
725     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
726     for (index_t i=0; i < numComp; ++i) {
727     const register double f_0 = f[INDEX2(i,0,numComp)];
728     const register double f_1 = f[INDEX2(i,1,numComp)];
729     const register double f_2 = f[INDEX2(i,2,numComp)];
730     const register double f_3 = f[INDEX2(i,3,numComp)];
731     const register double f_4 = f[INDEX2(i,4,numComp)];
732     const register double f_5 = f[INDEX2(i,5,numComp)];
733     const register double f_6 = f[INDEX2(i,6,numComp)];
734     const register double f_7 = f[INDEX2(i,7,numComp)];
735     int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
736     } /* end of component loop i */
737     } /* end of k0 loop */
738     } /* end of k1 loop */
739     } /* end of k2 loop */
740    
741     #pragma omp critical
742     for (index_t i=0; i<numComp; i++)
743     integrals[i]+=int_local[i];
744 caltinay 3722 } // end of parallel section
745 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
746     const double w_0 = h0*h1*h2;
747     #pragma omp parallel
748     {
749     vector<double> int_local(numComp, 0);
750 caltinay 3722 #pragma omp for nowait
751 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
752     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
753     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
754     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
755     for (index_t i=0; i < numComp; ++i) {
756     int_local[i]+=f[i]*w_0;
757     } /* end of component loop i */
758     } /* end of k0 loop */
759     } /* end of k1 loop */
760     } /* end of k2 loop */
761    
762     #pragma omp critical
763     for (index_t i=0; i<numComp; i++)
764     integrals[i]+=int_local[i];
765 caltinay 3722 } // end of parallel section
766 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
767     const double w_0 = h1*h2/4.;
768     const double w_1 = h0*h2/4.;
769     const double w_2 = h0*h1/4.;
770     #pragma omp parallel
771     {
772     vector<double> int_local(numComp, 0);
773     if (m_faceOffset[0] > -1) {
774 caltinay 3722 #pragma omp for nowait
775 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
776     for (index_t k1=0; k1 < m_NE1; ++k1) {
777     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
778     for (index_t i=0; i < numComp; ++i) {
779     const register double f_0 = f[INDEX2(i,0,numComp)];
780     const register double f_1 = f[INDEX2(i,1,numComp)];
781     const register double f_2 = f[INDEX2(i,2,numComp)];
782     const register double f_3 = f[INDEX2(i,3,numComp)];
783     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
784     } /* end of component loop i */
785     } /* end of k1 loop */
786     } /* end of k2 loop */
787     }
788    
789     if (m_faceOffset[1] > -1) {
790 caltinay 3722 #pragma omp for nowait
791 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
792     for (index_t k1=0; k1 < m_NE1; ++k1) {
793     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
794     for (index_t i=0; i < numComp; ++i) {
795     const register double f_0 = f[INDEX2(i,0,numComp)];
796     const register double f_1 = f[INDEX2(i,1,numComp)];
797     const register double f_2 = f[INDEX2(i,2,numComp)];
798     const register double f_3 = f[INDEX2(i,3,numComp)];
799     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
800     } /* end of component loop i */
801     } /* end of k1 loop */
802     } /* end of k2 loop */
803     }
804    
805     if (m_faceOffset[2] > -1) {
806 caltinay 3722 #pragma omp for nowait
807 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
808     for (index_t k0=0; k0 < m_NE0; ++k0) {
809     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
810     for (index_t i=0; i < numComp; ++i) {
811     const register double f_0 = f[INDEX2(i,0,numComp)];
812     const register double f_1 = f[INDEX2(i,1,numComp)];
813     const register double f_2 = f[INDEX2(i,2,numComp)];
814     const register double f_3 = f[INDEX2(i,3,numComp)];
815     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
816     } /* end of component loop i */
817     } /* end of k1 loop */
818     } /* end of k2 loop */
819     }
820    
821     if (m_faceOffset[3] > -1) {
822 caltinay 3722 #pragma omp for nowait
823 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
824     for (index_t k0=0; k0 < m_NE0; ++k0) {
825     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
826     for (index_t i=0; i < numComp; ++i) {
827     const register double f_0 = f[INDEX2(i,0,numComp)];
828     const register double f_1 = f[INDEX2(i,1,numComp)];
829     const register double f_2 = f[INDEX2(i,2,numComp)];
830     const register double f_3 = f[INDEX2(i,3,numComp)];
831     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
832     } /* end of component loop i */
833     } /* end of k1 loop */
834     } /* end of k2 loop */
835     }
836    
837     if (m_faceOffset[4] > -1) {
838 caltinay 3722 #pragma omp for nowait
839 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
840     for (index_t k0=0; k0 < m_NE0; ++k0) {
841     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
842     for (index_t i=0; i < numComp; ++i) {
843     const register double f_0 = f[INDEX2(i,0,numComp)];
844     const register double f_1 = f[INDEX2(i,1,numComp)];
845     const register double f_2 = f[INDEX2(i,2,numComp)];
846     const register double f_3 = f[INDEX2(i,3,numComp)];
847     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
848     } /* end of component loop i */
849     } /* end of k1 loop */
850     } /* end of k2 loop */
851     }
852    
853     if (m_faceOffset[5] > -1) {
854 caltinay 3722 #pragma omp for nowait
855 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
856     for (index_t k0=0; k0 < m_NE0; ++k0) {
857     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
858     for (index_t i=0; i < numComp; ++i) {
859     const register double f_0 = f[INDEX2(i,0,numComp)];
860     const register double f_1 = f[INDEX2(i,1,numComp)];
861     const register double f_2 = f[INDEX2(i,2,numComp)];
862     const register double f_3 = f[INDEX2(i,3,numComp)];
863     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
864     } /* end of component loop i */
865     } /* end of k1 loop */
866     } /* end of k2 loop */
867     }
868    
869     #pragma omp critical
870     for (index_t i=0; i<numComp; i++)
871     integrals[i]+=int_local[i];
872 caltinay 3722 } // end of parallel section
873 caltinay 3713
874     } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
875     const double w_0 = h1*h2;
876     const double w_1 = h0*h2;
877     const double w_2 = h0*h1;
878     #pragma omp parallel
879     {
880     vector<double> int_local(numComp, 0);
881     if (m_faceOffset[0] > -1) {
882 caltinay 3722 #pragma omp for nowait
883 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
884     for (index_t k1=0; k1 < m_NE1; ++k1) {
885     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
886     for (index_t i=0; i < numComp; ++i) {
887     int_local[i]+=f[i]*w_0;
888     } /* end of component loop i */
889     } /* end of k1 loop */
890     } /* end of k2 loop */
891     }
892    
893     if (m_faceOffset[1] > -1) {
894 caltinay 3722 #pragma omp for nowait
895 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
896     for (index_t k1=0; k1 < m_NE1; ++k1) {
897     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
898     for (index_t i=0; i < numComp; ++i) {
899     int_local[i]+=f[i]*w_0;
900     } /* end of component loop i */
901     } /* end of k1 loop */
902     } /* end of k2 loop */
903     }
904    
905     if (m_faceOffset[2] > -1) {
906 caltinay 3722 #pragma omp for nowait
907 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
908     for (index_t k0=0; k0 < m_NE0; ++k0) {
909     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
910     for (index_t i=0; i < numComp; ++i) {
911     int_local[i]+=f[i]*w_1;
912     } /* end of component loop i */
913     } /* end of k1 loop */
914     } /* end of k2 loop */
915     }
916    
917     if (m_faceOffset[3] > -1) {
918 caltinay 3722 #pragma omp for nowait
919 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
920     for (index_t k0=0; k0 < m_NE0; ++k0) {
921     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
922     for (index_t i=0; i < numComp; ++i) {
923     int_local[i]+=f[i]*w_1;
924     } /* end of component loop i */
925     } /* end of k1 loop */
926     } /* end of k2 loop */
927     }
928    
929     if (m_faceOffset[4] > -1) {
930 caltinay 3722 #pragma omp for nowait
931 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
932     for (index_t k0=0; k0 < m_NE0; ++k0) {
933     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
934     for (index_t i=0; i < numComp; ++i) {
935     int_local[i]+=f[i]*w_2;
936     } /* end of component loop i */
937     } /* end of k1 loop */
938     } /* end of k2 loop */
939     }
940    
941     if (m_faceOffset[5] > -1) {
942 caltinay 3722 #pragma omp for nowait
943 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
944     for (index_t k0=0; k0 < m_NE0; ++k0) {
945     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
946     for (index_t i=0; i < numComp; ++i) {
947     int_local[i]+=f[i]*w_2;
948     } /* end of component loop i */
949     } /* end of k1 loop */
950     } /* end of k2 loop */
951     }
952    
953     #pragma omp critical
954     for (index_t i=0; i<numComp; i++)
955     integrals[i]+=int_local[i];
956 caltinay 3722 } // end of parallel section
957 caltinay 3713
958     } else {
959     stringstream msg;
960     msg << "setToIntegrals() not implemented for "
961     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
962     throw RipleyException(msg.str());
963     }
964     }
965    
966 caltinay 3722 void Brick::setToNormal(escript::Data& out) const
967     {
968     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
969     #pragma omp parallel
970     {
971     if (m_faceOffset[0] > -1) {
972     #pragma omp for nowait
973     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
974     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
975     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
976     // set vector at four quadrature points
977     *o++ = -1.; *o++ = 0.; *o++ = 0.;
978     *o++ = -1.; *o++ = 0.; *o++ = 0.;
979     *o++ = -1.; *o++ = 0.; *o++ = 0.;
980     *o++ = -1.; *o++ = 0.; *o = 0.;
981     }
982     }
983     }
984    
985     if (m_faceOffset[1] > -1) {
986     #pragma omp for nowait
987     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
988     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
989     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
990     // set vector at four quadrature points
991     *o++ = 1.; *o++ = 0.; *o++ = 0.;
992     *o++ = 1.; *o++ = 0.; *o++ = 0.;
993     *o++ = 1.; *o++ = 0.; *o++ = 0.;
994     *o++ = 1.; *o++ = 0.; *o = 0.;
995     }
996     }
997     }
998    
999     if (m_faceOffset[2] > -1) {
1000     #pragma omp for nowait
1001     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1002     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1003     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1004     // set vector at four quadrature points
1005     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1006     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1007     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1008     *o++ = 0.; *o++ = -1.; *o = 0.;
1009     }
1010     }
1011     }
1012    
1013     if (m_faceOffset[3] > -1) {
1014     #pragma omp for nowait
1015     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1016     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1017     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1018     // set vector at four quadrature points
1019     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1020     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1021     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1022     *o++ = 0.; *o++ = 1.; *o = 0.;
1023     }
1024     }
1025     }
1026    
1027     if (m_faceOffset[4] > -1) {
1028     #pragma omp for nowait
1029     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1030     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1031     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1032     // set vector at four quadrature points
1033     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1034     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1035     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1036     *o++ = 0.; *o++ = 0.; *o = -1.;
1037     }
1038     }
1039     }
1040    
1041     if (m_faceOffset[5] > -1) {
1042     #pragma omp for nowait
1043     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1044     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1045     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1046     // set vector at four quadrature points
1047     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1048     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1049     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1050     *o++ = 0.; *o++ = 0.; *o = 1.;
1051     }
1052     }
1053     }
1054     } // end of parallel section
1055     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1056     #pragma omp parallel
1057     {
1058     if (m_faceOffset[0] > -1) {
1059     #pragma omp for nowait
1060     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1061     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1062     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1063     *o++ = -1.;
1064     *o++ = 0.;
1065     *o = 0.;
1066     }
1067     }
1068     }
1069    
1070     if (m_faceOffset[1] > -1) {
1071     #pragma omp for nowait
1072     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1073     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1074     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1075     *o++ = 1.;
1076     *o++ = 0.;
1077     *o = 0.;
1078     }
1079     }
1080     }
1081    
1082     if (m_faceOffset[2] > -1) {
1083     #pragma omp for nowait
1084     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1085     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1086     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1087     *o++ = 0.;
1088     *o++ = -1.;
1089     *o = 0.;
1090     }
1091     }
1092     }
1093    
1094     if (m_faceOffset[3] > -1) {
1095     #pragma omp for nowait
1096     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1097     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1098     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1099     *o++ = 0.;
1100     *o++ = 1.;
1101     *o = 0.;
1102     }
1103     }
1104     }
1105    
1106     if (m_faceOffset[4] > -1) {
1107     #pragma omp for nowait
1108     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1109     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1110     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1111     *o++ = 0.;
1112     *o++ = 0.;
1113     *o = -1.;
1114     }
1115     }
1116     }
1117    
1118     if (m_faceOffset[5] > -1) {
1119     #pragma omp for nowait
1120     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1121     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1122     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1123     *o++ = 0.;
1124     *o++ = 0.;
1125     *o = 1.;
1126     }
1127     }
1128     }
1129     } // end of parallel section
1130    
1131     } else {
1132     stringstream msg;
1133     msg << "setToNormal() not implemented for "
1134     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1135     throw RipleyException(msg.str());
1136     }
1137     }
1138    
1139 caltinay 3691 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1140     bool reducedColOrder) const
1141     {
1142     if (reducedRowOrder || reducedColOrder)
1143     throw RipleyException("getPattern() not implemented for reduced order");
1144    
1145     throw RipleyException("getPattern() not implemented");
1146     }
1147    
1148     void Brick::Print_Mesh_Info(const bool full) const
1149     {
1150     RipleyDomain::Print_Mesh_Info(full);
1151     if (full) {
1152     cout << " Id Coordinates" << endl;
1153     cout.precision(15);
1154     cout.setf(ios::scientific, ios::floatfield);
1155 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1156     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1157     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1158 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
1159     cout << " " << setw(5) << m_nodeId[i]
1160 caltinay 3698 << " " << xdx.first+(i%m_N0)*xdx.second
1161     << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1162     << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1163 caltinay 3691 }
1164     }
1165     }
1166    
1167 caltinay 3698 IndexVector Brick::getNumNodesPerDim() const
1168     {
1169     IndexVector ret;
1170     ret.push_back(m_N0);
1171     ret.push_back(m_N1);
1172     ret.push_back(m_N2);
1173     return ret;
1174     }
1175    
1176     IndexVector Brick::getNumElementsPerDim() const
1177     {
1178     IndexVector ret;
1179     ret.push_back(m_NE0);
1180     ret.push_back(m_NE1);
1181     ret.push_back(m_NE2);
1182     return ret;
1183     }
1184    
1185     IndexVector Brick::getNumFacesPerBoundary() const
1186     {
1187     IndexVector ret(6, 0);
1188     //left
1189     if (m_offset0==0)
1190     ret[0]=m_NE1*m_NE2;
1191     //right
1192     if (m_mpiInfo->rank%m_NX==m_NX-1)
1193     ret[1]=m_NE1*m_NE2;
1194     //bottom
1195     if (m_offset1==0)
1196     ret[2]=m_NE0*m_NE2;
1197     //top
1198     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1199     ret[3]=m_NE0*m_NE2;
1200     //front
1201     if (m_offset2==0)
1202     ret[4]=m_NE0*m_NE1;
1203     //back
1204     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1205     ret[5]=m_NE0*m_NE1;
1206     return ret;
1207     }
1208    
1209     pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1210     {
1211     if (dim==0)
1212     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1213     else if (dim==1)
1214     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1215     else if (dim==2)
1216     return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1217    
1218     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1219     }
1220    
1221    
1222 caltinay 3691 //protected
1223     dim_t Brick::getNumFaceElements() const
1224     {
1225     dim_t n=0;
1226     //left
1227     if (m_offset0==0)
1228     n+=m_NE1*m_NE2;
1229     //right
1230     if (m_mpiInfo->rank%m_NX==m_NX-1)
1231     n+=m_NE1*m_NE2;
1232     //bottom
1233     if (m_offset1==0)
1234     n+=m_NE0*m_NE2;
1235     //top
1236     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1237     n+=m_NE0*m_NE2;
1238     //front
1239     if (m_offset2==0)
1240     n+=m_NE0*m_NE1;
1241     //back
1242     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1243     n+=m_NE0*m_NE1;
1244    
1245     return n;
1246     }
1247    
1248     //protected
1249     void Brick::assembleCoordinates(escript::Data& arg) const
1250     {
1251     escriptDataC x = arg.getDataC();
1252     int numDim = m_numDim;
1253     if (!isDataPointShapeEqual(&x, 1, &numDim))
1254     throw RipleyException("setToX: Invalid Data object shape");
1255     if (!numSamplesEqual(&x, 1, getNumNodes()))
1256     throw RipleyException("setToX: Illegal number of samples in Data object");
1257    
1258 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1259     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1260     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1261 caltinay 3691 arg.requireWrite();
1262     #pragma omp parallel for
1263     for (dim_t i2 = 0; i2 < m_N2; i2++) {
1264     for (dim_t i1 = 0; i1 < m_N1; i1++) {
1265     for (dim_t i0 = 0; i0 < m_N0; i0++) {
1266     double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1267 caltinay 3698 point[0] = xdx.first+i0*xdx.second;
1268     point[1] = ydy.first+i1*ydy.second;
1269     point[2] = zdz.first+i2*zdz.second;
1270 caltinay 3691 }
1271     }
1272     }
1273     }
1274    
1275     //private
1276     void Brick::populateSampleIds()
1277     {
1278 caltinay 3697 // identifiers are ordered from left to right, bottom to top, front to back
1279     // on each rank, except for the shared nodes which are owned by the rank
1280     // below / to the left / to the front of the current rank
1281 caltinay 3698
1282     // build node distribution vector first.
1283     // m_nodeDistribution[i] is the first node id on rank i, that is
1284     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1285     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1286     m_nodeDistribution[1]=getNumNodes();
1287     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1288     const index_t x = k%m_NX;
1289     const index_t y = k%(m_NX*m_NY)/m_NX;
1290     const index_t z = k/(m_NX*m_NY);
1291     index_t numNodes=getNumNodes();
1292     if (x>0)
1293     numNodes-=m_N1*m_N2;
1294     if (y>0)
1295     numNodes-=m_N0*m_N2;
1296     if (z>0)
1297     numNodes-=m_N0*m_N1;
1298     // if an edge was subtracted twice add it back
1299     if (x>0 && y>0)
1300     numNodes+=m_N2;
1301     if (x>0 && z>0)
1302     numNodes+=m_N1;
1303     if (y>0 && z>0)
1304     numNodes+=m_N0;
1305     // the corner node was removed 3x and added back 3x, so subtract it
1306     if (x>0 && y>0 && z>0)
1307     numNodes--;
1308     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1309     }
1310     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1311    
1312 caltinay 3691 m_nodeId.resize(getNumNodes());
1313 caltinay 3698
1314     // the bottom, left and front planes are not owned by this rank so the
1315     // identifiers need to be computed accordingly
1316     const index_t left = (m_offset0==0 ? 0 : 1);
1317     const index_t bottom = (m_offset1==0 ? 0 : 1);
1318     const index_t front = (m_offset2==0 ? 0 : 1);
1319    
1320     // case 1: all nodes on left plane are owned by rank on the left
1321     if (left>0) {
1322     const int neighbour=m_mpiInfo->rank-1;
1323     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1324     const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1325 caltinay 3691 #pragma omp parallel for
1326 caltinay 3698 for (dim_t i2=front; i2<m_N2; i2++) {
1327     for (dim_t i1=bottom; i1<m_N1; i1++) {
1328     m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1329     + (i1-bottom+1)*leftN0
1330     + (i2-front)*leftN0*leftN1 - 1;
1331     }
1332     }
1333 caltinay 3691 }
1334 caltinay 3698 // case 2: all nodes on bottom plane are owned by rank below
1335     if (bottom>0) {
1336     const int neighbour=m_mpiInfo->rank-m_NX;
1337     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1338     const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1339     #pragma omp parallel for
1340     for (dim_t i2=front; i2<m_N2; i2++) {
1341     for (dim_t i0=left; i0<m_N0; i0++) {
1342     m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1343     + bottomN0*(bottomN1-1)
1344     + (i2-front)*bottomN0*bottomN1 + i0-left;
1345     }
1346     }
1347     }
1348     // case 3: all nodes on front plane are owned by rank in front
1349     if (front>0) {
1350     const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
1351     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1352     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1353     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1354     #pragma omp parallel for
1355     for (dim_t i1=bottom; i1<m_N1; i1++) {
1356     for (dim_t i0=left; i0<m_N0; i0++) {
1357     m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
1358     + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
1359     }
1360     }
1361     }
1362     // case 4: nodes on front bottom edge are owned by the corresponding rank
1363     if (front>0 && bottom>0) {
1364     const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
1365     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1366     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1367     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1368     #pragma omp parallel for
1369     for (dim_t i0=left; i0<m_N0; i0++) {
1370     m_nodeId[i0]=m_nodeDistribution[neighbour]
1371     + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
1372     }
1373     }
1374     // case 5: nodes on left bottom edge are owned by the corresponding rank
1375     if (left>0 && bottom>0) {
1376     const int neighbour=m_mpiInfo->rank-m_NX-1;
1377     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1378     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1379     #pragma omp parallel for
1380     for (dim_t i2=front; i2<m_N2; i2++) {
1381     m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1382     + (1+i2-front)*N0*N1-1;
1383     }
1384     }
1385     // case 6: nodes on left front edge are owned by the corresponding rank
1386     if (left>0 && front>0) {
1387     const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
1388     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1389     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1390     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1391     #pragma omp parallel for
1392     for (dim_t i1=bottom; i1<m_N1; i1++) {
1393     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1394     + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
1395     }
1396     }
1397     // case 7: bottom-left-front corner node owned by corresponding rank
1398     if (left>0 && bottom>0 && front>0) {
1399     const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
1400     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1401     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1402     const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
1403     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
1404     }
1405 caltinay 3697
1406 caltinay 3698 // the rest of the id's are contiguous
1407     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1408     #pragma omp parallel for
1409     for (dim_t i2=front; i2<m_N2; i2++) {
1410     for (dim_t i1=bottom; i1<m_N1; i1++) {
1411     for (dim_t i0=left; i0<m_N0; i0++) {
1412     m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
1413     +(i1-bottom)*(m_N0-left)
1414     +(i2-front)*(m_N0-left)*(m_N1-bottom);
1415     }
1416     }
1417     }
1418    
1419 caltinay 3697 // elements
1420     m_elementId.resize(getNumElements());
1421     #pragma omp parallel for
1422     for (dim_t k=0; k<getNumElements(); k++) {
1423     m_elementId[k]=k;
1424     }
1425    
1426     // face elements
1427     m_faceId.resize(getNumFaceElements());
1428     #pragma omp parallel for
1429     for (dim_t k=0; k<getNumFaceElements(); k++) {
1430     m_faceId[k]=k;
1431     }
1432 caltinay 3704
1433 caltinay 3722 // generate face offset vector and set face tags
1434 caltinay 3704 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1435 caltinay 3722 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1436     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1437 caltinay 3704 m_faceOffset.assign(facesPerEdge.size(), -1);
1438 caltinay 3722 m_faceTags.clear();
1439 caltinay 3704 index_t offset=0;
1440     for (size_t i=0; i<facesPerEdge.size(); i++) {
1441     if (facesPerEdge[i]>0) {
1442     m_faceOffset[i]=offset;
1443     offset+=facesPerEdge[i];
1444 caltinay 3722 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1445 caltinay 3704 }
1446     }
1447 caltinay 3722 setTagMap("left", LEFT);
1448     setTagMap("right", RIGHT);
1449     setTagMap("bottom", BOTTOM);
1450     setTagMap("top", TOP);
1451     setTagMap("front", FRONT);
1452     setTagMap("back", BACK);
1453     updateTagsInUse(FaceElements);
1454 caltinay 3691 }
1455    
1456 caltinay 3703 //protected
1457 caltinay 3711 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1458     bool reduced) const
1459 caltinay 3703 {
1460     const dim_t numComp = in.getDataPointSize();
1461 caltinay 3711 if (reduced) {
1462 caltinay 3731 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1463     const double c0 = .125;
1464 caltinay 3703 #pragma omp parallel for
1465 caltinay 3711 for (index_t k2=0; k2 < m_NE2; ++k2) {
1466     for (index_t k1=0; k1 < m_NE1; ++k1) {
1467     for (index_t k0=0; k0 < m_NE0; ++k0) {
1468     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1469     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1470     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1471     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1472     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1473     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1474     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1475     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1476     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1477     for (index_t i=0; i < numComp; ++i) {
1478 caltinay 3731 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1479 caltinay 3711 } /* end of component loop i */
1480     } /* end of k0 loop */
1481     } /* end of k1 loop */
1482     } /* end of k2 loop */
1483     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1484     } else {
1485 caltinay 3731 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1486     const double c0 = .0094373878376559314545;
1487     const double c1 = .035220810900864519624;
1488     const double c2 = .13144585576580214704;
1489     const double c3 = .49056261216234406855;
1490 caltinay 3711 #pragma omp parallel for
1491     for (index_t k2=0; k2 < m_NE2; ++k2) {
1492     for (index_t k1=0; k1 < m_NE1; ++k1) {
1493     for (index_t k0=0; k0 < m_NE0; ++k0) {
1494     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1495     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1496     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1497     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1498     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1499     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1500     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1501     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1502     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1503     for (index_t i=0; i < numComp; ++i) {
1504 caltinay 3731 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1505     o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1506     o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1507     o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1508     o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1509     o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1510     o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1511     o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1512 caltinay 3711 } /* end of component loop i */
1513     } /* end of k0 loop */
1514     } /* end of k1 loop */
1515     } /* end of k2 loop */
1516     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1517     }
1518 caltinay 3703 }
1519    
1520     //protected
1521 caltinay 3711 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1522     bool reduced) const
1523 caltinay 3703 {
1524 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1525 caltinay 3711 if (reduced) {
1526 caltinay 3731 const double c0 = .25;
1527     #pragma omp parallel
1528     {
1529     /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1530     if (m_faceOffset[0] > -1) {
1531     #pragma omp for nowait
1532     for (index_t k2=0; k2 < m_NE2; ++k2) {
1533     for (index_t k1=0; k1 < m_NE1; ++k1) {
1534     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1535     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1536     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1537     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1538     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1539     for (index_t i=0; i < numComp; ++i) {
1540     o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1541     } /* end of component loop i */
1542     } /* end of k1 loop */
1543     } /* end of k2 loop */
1544     } /* end of face 0 */
1545     if (m_faceOffset[1] > -1) {
1546     #pragma omp for nowait
1547     for (index_t k2=0; k2 < m_NE2; ++k2) {
1548     for (index_t k1=0; k1 < m_NE1; ++k1) {
1549     const re