/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3711 - (hide annotations)
Tue Dec 6 00:24:43 2011 UTC (7 years, 3 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 95854 byte(s)
Interpolation & Gradient for reduced elements & faces.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Brick.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33     int d1, int d2) :
34     RipleyDomain(3),
35     m_gNE0(n0),
36     m_gNE1(n1),
37     m_gNE2(n2),
38     m_l0(l0),
39     m_l1(l1),
40     m_l2(l2),
41     m_NX(d0),
42     m_NY(d1),
43     m_NZ(d2)
44     {
45     // ensure number of subdivisions is valid and nodes can be distributed
46     // among number of ranks
47     if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48     throw RipleyException("Invalid number of spatial subdivisions");
49    
50     if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52    
53     // local number of elements
54     m_NE0 = n0/m_NX;
55     m_NE1 = n1/m_NY;
56     m_NE2 = n2/m_NZ;
57     // local number of nodes (not necessarily owned)
58     m_N0 = m_NE0+1;
59     m_N1 = m_NE1+1;
60     m_N2 = m_NE2+1;
61     // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63     m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64     m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65     populateSampleIds();
66     }
67    
68    
69     Brick::~Brick()
70     {
71     }
72    
73     string Brick::getDescription() const
74     {
75     return "ripley::Brick";
76     }
77    
78     bool Brick::operator==(const AbstractDomain& other) const
79     {
80     if (dynamic_cast<const Brick*>(&other))
81     return this==&other;
82    
83     return false;
84     }
85    
86     void Brick::dump(const string& fileName) const
87     {
88     #if USE_SILO
89     string fn(fileName);
90     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
91     fn+=".silo";
92     }
93    
94     const int NUM_SILO_FILES = 1;
95     const char* blockDirFmt = "/block%04d";
96     int driver=DB_HDF5;
97     string siloPath;
98     DBfile* dbfile = NULL;
99    
100     #ifdef ESYS_MPI
101     PMPIO_baton_t* baton = NULL;
102     #endif
103    
104     if (m_mpiInfo->size > 1) {
105     #ifdef ESYS_MPI
106     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
107     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
108     PMPIO_DefaultClose, (void*)&driver);
109     // try the fallback driver in case of error
110     if (!baton && driver != DB_PDB) {
111     driver = DB_PDB;
112     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
113     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
114     PMPIO_DefaultClose, (void*)&driver);
115     }
116     if (baton) {
117     char str[64];
118     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
119     siloPath = str;
120     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
121     }
122     #endif
123     } else {
124     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
125     getDescription().c_str(), driver);
126     // try the fallback driver in case of error
127     if (!dbfile && driver != DB_PDB) {
128     driver = DB_PDB;
129     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130     getDescription().c_str(), driver);
131     }
132     }
133    
134     if (!dbfile)
135     throw RipleyException("dump: Could not create Silo file");
136    
137     /*
138     if (driver==DB_HDF5) {
139     // gzip level 1 already provides good compression with minimal
140     // performance penalty. Some tests showed that gzip levels >3 performed
141     // rather badly on escript data both in terms of time and space
142     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
143     }
144     */
145    
146     boost::scoped_ptr<double> x(new double[m_N0]);
147     boost::scoped_ptr<double> y(new double[m_N1]);
148     boost::scoped_ptr<double> z(new double[m_N2]);
149     double* coords[3] = { x.get(), y.get(), z.get() };
150 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
151     pair<double,double> ydy = getFirstCoordAndSpacing(1);
152     pair<double,double> zdz = getFirstCoordAndSpacing(2);
153 caltinay 3691 #pragma omp parallel
154     {
155     #pragma omp for
156     for (dim_t i0 = 0; i0 < m_N0; i0++) {
157 caltinay 3698 coords[0][i0]=xdx.first+i0*xdx.second;
158 caltinay 3691 }
159     #pragma omp for
160     for (dim_t i1 = 0; i1 < m_N1; i1++) {
161 caltinay 3698 coords[1][i1]=ydy.first+i1*ydy.second;
162 caltinay 3691 }
163     #pragma omp for
164     for (dim_t i2 = 0; i2 < m_N2; i2++) {
165 caltinay 3698 coords[2][i2]=zdz.first+i2*zdz.second;
166 caltinay 3691 }
167     }
168 caltinay 3698 IndexVector dims = getNumNodesPerDim();
169     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
170 caltinay 3691 DB_COLLINEAR, NULL);
171    
172 caltinay 3698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
173     NULL, 0, DB_INT, DB_NODECENT, NULL);
174 caltinay 3691
175 caltinay 3698 // write element ids
176     dims = getNumElementsPerDim();
177     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
178     &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
179    
180     // rank 0 writes multimesh and multivar
181 caltinay 3691 if (m_mpiInfo->rank == 0) {
182     vector<string> tempstrings;
183     vector<char*> names;
184     for (dim_t i=0; i<m_mpiInfo->size; i++) {
185     stringstream path;
186     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
187     tempstrings.push_back(path.str());
188     names.push_back((char*)tempstrings.back().c_str());
189     }
190     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
191     DBSetDir(dbfile, "/");
192     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     types.assign(m_mpiInfo->size, DB_QUADVAR);
203     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
204     &types[0], NULL);
205 caltinay 3698 tempstrings.clear();
206     names.clear();
207     for (dim_t i=0; i<m_mpiInfo->size; i++) {
208     stringstream path;
209     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
210     tempstrings.push_back(path.str());
211     names.push_back((char*)tempstrings.back().c_str());
212     }
213     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
214     &types[0], NULL);
215 caltinay 3691 }
216    
217     if (m_mpiInfo->size > 1) {
218     #ifdef ESYS_MPI
219     PMPIO_HandOffBaton(baton, dbfile);
220     PMPIO_Finish(baton);
221     #endif
222     } else {
223     DBClose(dbfile);
224     }
225    
226     #else // USE_SILO
227     throw RipleyException("dump(): no Silo support");
228     #endif
229     }
230    
231     const int* Brick::borrowSampleReferenceIDs(int fsType) const
232     {
233 caltinay 3697 switch (fsType) {
234     case Nodes:
235     return &m_nodeId[0];
236     case Elements:
237     return &m_elementId[0];
238     case FaceElements:
239     return &m_faceId[0];
240     default:
241     break;
242     }
243 caltinay 3691
244 caltinay 3697 stringstream msg;
245     msg << "borrowSampleReferenceIDs() not implemented for function space type "
246     << fsType;
247     throw RipleyException(msg.str());
248 caltinay 3691 }
249    
250     bool Brick::ownSample(int fsCode, index_t id) const
251     {
252     #ifdef ESYS_MPI
253     if (fsCode == Nodes) {
254 caltinay 3698 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
255     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
256 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
257     } else
258     throw RipleyException("ownSample() only implemented for Nodes");
259     #else
260     return true;
261     #endif
262     }
263    
264 caltinay 3703 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
265     {
266     escript::Data& in = *const_cast<escript::Data*>(&cIn);
267     const dim_t numComp = in.getDataPointSize();
268     const double h0 = m_l0/m_gNE0;
269     const double h1 = m_l1/m_gNE1;
270     const double h2 = m_l1/m_gNE2;
271     if (out.getFunctionSpace().getTypeCode() == Elements) {
272     /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
273     const double tmp0_22 = -0.044658198738520451079/h1;
274     const double tmp0_16 = 0.16666666666666666667/h0;
275     const double tmp0_33 = -0.62200846792814621559/h1;
276     const double tmp0_0 = -0.62200846792814621559/h0;
277     const double tmp0_21 = -0.16666666666666666667/h1;
278     const double tmp0_17 = 0.62200846792814621559/h0;
279     const double tmp0_52 = -0.044658198738520451079/h2;
280     const double tmp0_1 = -0.16666666666666666667/h0;
281     const double tmp0_20 = -0.62200846792814621559/h1;
282     const double tmp0_14 = -0.044658198738520451079/h0;
283     const double tmp0_53 = -0.62200846792814621559/h2;
284     const double tmp0_49 = 0.16666666666666666667/h2;
285     const double tmp0_2 = 0.16666666666666666667/h0;
286     const double tmp0_27 = -0.044658198738520451079/h1;
287     const double tmp0_15 = -0.16666666666666666667/h0;
288     const double tmp0_50 = -0.16666666666666666667/h2;
289     const double tmp0_48 = 0.62200846792814621559/h2;
290     const double tmp0_3 = 0.044658198738520451079/h0;
291     const double tmp0_26 = -0.16666666666666666667/h1;
292     const double tmp0_12 = -0.62200846792814621559/h0;
293     const double tmp0_51 = 0.044658198738520451079/h2;
294     const double tmp0_25 = 0.62200846792814621559/h1;
295     const double tmp0_13 = 0.16666666666666666667/h0;
296     const double tmp0_56 = 0.16666666666666666667/h2;
297     const double tmp0_24 = 0.16666666666666666667/h1;
298     const double tmp0_10 = 0.62200846792814621559/h0;
299     const double tmp0_57 = 0.62200846792814621559/h2;
300     const double tmp0_11 = -0.16666666666666666667/h0;
301     const double tmp0_54 = -0.044658198738520451079/h2;
302     const double tmp0_38 = 0.16666666666666666667/h1;
303     const double tmp0_34 = -0.044658198738520451079/h1;
304     const double tmp0_42 = 0.16666666666666666667/h2;
305     const double tmp0_35 = -0.16666666666666666667/h1;
306     const double tmp0_36 = -0.62200846792814621559/h1;
307     const double tmp0_41 = 0.62200846792814621559/h2;
308     const double tmp0_8 = 0.044658198738520451079/h0;
309     const double tmp0_37 = 0.62200846792814621559/h1;
310     const double tmp0_29 = 0.16666666666666666667/h1;
311     const double tmp0_40 = -0.62200846792814621559/h2;
312     const double tmp0_9 = 0.16666666666666666667/h0;
313     const double tmp0_30 = 0.62200846792814621559/h1;
314     const double tmp0_28 = -0.16666666666666666667/h1;
315     const double tmp0_43 = 0.044658198738520451079/h2;
316     const double tmp0_32 = 0.16666666666666666667/h1;
317     const double tmp0_31 = 0.044658198738520451079/h1;
318     const double tmp0_39 = 0.044658198738520451079/h1;
319     const double tmp0_58 = -0.62200846792814621559/h2;
320     const double tmp0_55 = 0.044658198738520451079/h2;
321     const double tmp0_18 = -0.62200846792814621559/h0;
322     const double tmp0_45 = -0.16666666666666666667/h2;
323     const double tmp0_59 = -0.16666666666666666667/h2;
324     const double tmp0_4 = -0.044658198738520451079/h0;
325     const double tmp0_19 = 0.044658198738520451079/h0;
326     const double tmp0_44 = -0.044658198738520451079/h2;
327     const double tmp0_5 = 0.62200846792814621559/h0;
328     const double tmp0_47 = 0.16666666666666666667/h2;
329     const double tmp0_6 = -0.16666666666666666667/h0;
330     const double tmp0_23 = 0.044658198738520451079/h1;
331     const double tmp0_46 = -0.16666666666666666667/h2;
332     const double tmp0_7 = -0.044658198738520451079/h0;
333     #pragma omp parallel for
334 caltinay 3707 for (index_t k2=0; k2 < m_NE2; ++k2) {
335     for (index_t k1=0; k1 < m_NE1; ++k1) {
336     for (index_t k0=0; k0 < m_NE0; ++k0) {
337 caltinay 3703 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
338     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
339     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
340     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
341     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
342     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
343     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
344     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
345     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
346     for (index_t i=0; i < numComp; ++i) {
347     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
348     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_20 + f_010[i]*tmp0_25 + f_101[i]*tmp0_22 + f_111[i]*tmp0_23 + tmp0_21*(f_001[i] + f_100[i]) + tmp0_24*(f_011[i] + f_110[i]);
349     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_40 + f_001[i]*tmp0_41 + f_110[i]*tmp0_44 + f_111[i]*tmp0_43 + tmp0_42*(f_011[i] + f_101[i]) + tmp0_45*(f_010[i] + f_100[i]);
350     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
351     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_27 + f_010[i]*tmp0_32 + f_011[i]*tmp0_31 + f_100[i]*tmp0_33 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
352     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_52 + f_011[i]*tmp0_51 + f_100[i]*tmp0_53 + f_101[i]*tmp0_48 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
353     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
354     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_20 + f_010[i]*tmp0_25 + f_101[i]*tmp0_22 + f_111[i]*tmp0_23 + tmp0_21*(f_001[i] + f_100[i]) + tmp0_24*(f_011[i] + f_110[i]);
355     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_53 + f_011[i]*tmp0_48 + f_100[i]*tmp0_52 + f_101[i]*tmp0_51 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
356     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
357     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_27 + f_010[i]*tmp0_32 + f_011[i]*tmp0_31 + f_100[i]*tmp0_33 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
358     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_54 + f_001[i]*tmp0_55 + f_110[i]*tmp0_58 + f_111[i]*tmp0_57 + tmp0_56*(f_011[i] + f_101[i]) + tmp0_59*(f_010[i] + f_100[i]);
359     o[INDEX3(i,0,4,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
360     o[INDEX3(i,1,4,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_33 + f_010[i]*tmp0_32 + f_011[i]*tmp0_30 + f_100[i]*tmp0_27 + f_101[i]*tmp0_28 + f_110[i]*tmp0_31 + f_111[i]*tmp0_29;
361     o[INDEX3(i,2,4,numComp,3)] = f_000[i]*tmp0_40 + f_001[i]*tmp0_41 + f_110[i]*tmp0_44 + f_111[i]*tmp0_43 + tmp0_42*(f_011[i] + f_101[i]) + tmp0_45*(f_010[i] + f_100[i]);
362     o[INDEX3(i,0,5,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
363     o[INDEX3(i,1,5,numComp,3)] = f_000[i]*tmp0_34 + f_010[i]*tmp0_39 + f_101[i]*tmp0_36 + f_111[i]*tmp0_37 + tmp0_35*(f_001[i] + f_100[i]) + tmp0_38*(f_011[i] + f_110[i]);
364     o[INDEX3(i,2,5,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_52 + f_011[i]*tmp0_51 + f_100[i]*tmp0_53 + f_101[i]*tmp0_48 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
365     o[INDEX3(i,0,6,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
366     o[INDEX3(i,1,6,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_33 + f_010[i]*tmp0_32 + f_011[i]*tmp0_30 + f_100[i]*tmp0_27 + f_101[i]*tmp0_28 + f_110[i]*tmp0_31 + f_111[i]*tmp0_29;
367     o[INDEX3(i,2,6,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_53 + f_011[i]*tmp0_48 + f_100[i]*tmp0_52 + f_101[i]*tmp0_51 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
368     o[INDEX3(i,0,7,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
369     o[INDEX3(i,1,7,numComp,3)] = f_000[i]*tmp0_34 + f_010[i]*tmp0_39 + f_101[i]*tmp0_36 + f_111[i]*tmp0_37 + tmp0_35*(f_001[i] + f_100[i]) + tmp0_38*(f_011[i] + f_110[i]);
370     o[INDEX3(i,2,7,numComp,3)] = f_000[i]*tmp0_54 + f_001[i]*tmp0_55 + f_110[i]*tmp0_58 + f_111[i]*tmp0_57 + tmp0_56*(f_011[i] + f_101[i]) + tmp0_59*(f_010[i] + f_100[i]);
371     } /* end of component loop i */
372     } /* end of k0 loop */
373     } /* end of k1 loop */
374     } /* end of k2 loop */
375     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
376 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
377     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
378     const double tmp0_0 = -0.25/h0;
379     const double tmp0_4 = -0.25/h2;
380     const double tmp0_1 = 0.25/h0;
381     const double tmp0_5 = 0.25/h2;
382     const double tmp0_2 = -0.25/h1;
383     const double tmp0_3 = 0.25/h1;
384     #pragma omp parallel for
385     for (index_t k2=0; k2 < m_NE2; ++k2) {
386     for (index_t k1=0; k1 < m_NE1; ++k1) {
387     for (index_t k0=0; k0 < m_NE0; ++k0) {
388     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
389     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
390     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
391     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
392     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
393     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
394     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
395     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
396     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
397     for (index_t i=0; i < numComp; ++i) {
398     o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]) + tmp0_1*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
399     o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_000[i] + f_001[i] + f_100[i] + f_101[i]) + tmp0_3*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
400     o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_000[i] + f_010[i] + f_100[i] + f_110[i]) + tmp0_5*(f_001[i] + f_011[i] + f_101[i] + f_111[i]);
401     } /* end of component loop i */
402     } /* end of k0 loop */
403     } /* end of k1 loop */
404     } /* end of k2 loop */
405     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
406 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
407     /* GENERATOR SNIP_GRAD_FACES TOP */
408     if (m_faceOffset[0] > -1) {
409     const double tmp0_22 = 0.21132486540518711775/h1;
410     const double tmp0_16 = 0.16666666666666666667/h0;
411     const double tmp0_33 = 0.21132486540518711775/h2;
412     const double tmp0_0 = -0.62200846792814621559/h0;
413     const double tmp0_21 = -0.21132486540518711775/h1;
414     const double tmp0_17 = 0.62200846792814621559/h0;
415     const double tmp0_1 = -0.16666666666666666667/h0;
416     const double tmp0_20 = -0.78867513459481288225/h1;
417     const double tmp0_14 = -0.044658198738520451079/h0;
418     const double tmp0_2 = 0.16666666666666666667/h0;
419     const double tmp0_27 = 0.21132486540518711775/h1;
420     const double tmp0_15 = -0.16666666666666666667/h0;
421     const double tmp0_3 = 0.044658198738520451079/h0;
422     const double tmp0_26 = 0.78867513459481288225/h1;
423     const double tmp0_12 = -0.62200846792814621559/h0;
424     const double tmp0_25 = -0.78867513459481288225/h1;
425     const double tmp0_13 = 0.16666666666666666667/h0;
426     const double tmp0_24 = -0.21132486540518711775/h1;
427     const double tmp0_10 = 0.62200846792814621559/h0;
428     const double tmp0_11 = -0.16666666666666666667/h0;
429     const double tmp0_34 = 0.78867513459481288225/h2;
430     const double tmp0_35 = -0.78867513459481288225/h2;
431     const double tmp0_8 = 0.044658198738520451079/h0;
432     const double tmp0_29 = 0.78867513459481288225/h2;
433     const double tmp0_9 = 0.16666666666666666667/h0;
434     const double tmp0_30 = 0.21132486540518711775/h2;
435     const double tmp0_28 = -0.78867513459481288225/h2;
436     const double tmp0_32 = -0.21132486540518711775/h2;
437     const double tmp0_31 = -0.21132486540518711775/h2;
438     const double tmp0_18 = -0.62200846792814621559/h0;
439     const double tmp0_4 = -0.044658198738520451079/h0;
440     const double tmp0_19 = 0.044658198738520451079/h0;
441     const double tmp0_5 = 0.62200846792814621559/h0;
442     const double tmp0_6 = -0.16666666666666666667/h0;
443     const double tmp0_23 = 0.78867513459481288225/h1;
444     const double tmp0_7 = -0.044658198738520451079/h0;
445     #pragma omp parallel for
446     for (index_t k2=0; k2 < m_NE2; ++k2) {
447     for (index_t k1=0; k1 < m_NE1; ++k1) {
448     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
449     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
450     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
451     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
452     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
453     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
454     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
455     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
456     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
457     for (index_t i=0; i < numComp; ++i) {
458     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
459     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_20 + f_001[i]*tmp0_21 + f_010[i]*tmp0_23 + f_011[i]*tmp0_22;
460     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_010[i]*tmp0_31 + f_011[i]*tmp0_30;
461     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
462     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_20 + f_001[i]*tmp0_21 + f_010[i]*tmp0_23 + f_011[i]*tmp0_22;
463     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_010[i]*tmp0_35 + f_011[i]*tmp0_34;
464     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
465     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_24 + f_001[i]*tmp0_25 + f_010[i]*tmp0_27 + f_011[i]*tmp0_26;
466     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_010[i]*tmp0_31 + f_011[i]*tmp0_30;
467     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
468     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_24 + f_001[i]*tmp0_25 + f_010[i]*tmp0_27 + f_011[i]*tmp0_26;
469     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_010[i]*tmp0_35 + f_011[i]*tmp0_34;
470     } /* end of component loop i */
471     } /* end of k1 loop */
472     } /* end of k2 loop */
473     } /* end of face 0 */
474     if (m_faceOffset[1] > -1) {
475     const double tmp0_22 = 0.78867513459481288225/h1;
476     const double tmp0_16 = 0.16666666666666666667/h0;
477     const double tmp0_33 = 0.78867513459481288225/h2;
478     const double tmp0_0 = -0.62200846792814621559/h0;
479     const double tmp0_21 = 0.21132486540518711775/h1;
480     const double tmp0_17 = 0.62200846792814621559/h0;
481     const double tmp0_1 = -0.16666666666666666667/h0;
482     const double tmp0_20 = -0.21132486540518711775/h1;
483     const double tmp0_14 = -0.044658198738520451079/h0;
484     const double tmp0_2 = 0.16666666666666666667/h0;
485     const double tmp0_27 = -0.21132486540518711775/h1;
486     const double tmp0_15 = -0.16666666666666666667/h0;
487     const double tmp0_3 = 0.044658198738520451079/h0;
488     const double tmp0_26 = 0.21132486540518711775/h1;
489     const double tmp0_12 = -0.62200846792814621559/h0;
490     const double tmp0_25 = 0.78867513459481288225/h1;
491     const double tmp0_13 = 0.16666666666666666667/h0;
492     const double tmp0_24 = -0.78867513459481288225/h1;
493     const double tmp0_10 = 0.62200846792814621559/h0;
494     const double tmp0_11 = -0.16666666666666666667/h0;
495     const double tmp0_34 = -0.78867513459481288225/h2;
496     const double tmp0_35 = -0.21132486540518711775/h2;
497     const double tmp0_8 = 0.044658198738520451079/h0;
498     const double tmp0_29 = 0.21132486540518711775/h2;
499     const double tmp0_9 = 0.16666666666666666667/h0;
500     const double tmp0_30 = -0.21132486540518711775/h2;
501     const double tmp0_28 = 0.78867513459481288225/h2;
502     const double tmp0_32 = 0.21132486540518711775/h2;
503     const double tmp0_31 = -0.78867513459481288225/h2;
504     const double tmp0_18 = -0.62200846792814621559/h0;
505     const double tmp0_4 = -0.044658198738520451079/h0;
506     const double tmp0_19 = 0.044658198738520451079/h0;
507     const double tmp0_5 = 0.62200846792814621559/h0;
508     const double tmp0_6 = -0.16666666666666666667/h0;
509     const double tmp0_23 = -0.78867513459481288225/h1;
510     const double tmp0_7 = -0.044658198738520451079/h0;
511     #pragma omp parallel for
512     for (index_t k2=0; k2 < m_NE2; ++k2) {
513     for (index_t k1=0; k1 < m_NE1; ++k1) {
514     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
515     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
516     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
517     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
518     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
519     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
520     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
521     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
522     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
523     for (index_t i=0; i < numComp; ++i) {
524     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
525     o[INDEX3(i,1,0,numComp,3)] = f_100[i]*tmp0_23 + f_101[i]*tmp0_20 + f_110[i]*tmp0_22 + f_111[i]*tmp0_21;
526     o[INDEX3(i,2,0,numComp,3)] = f_100[i]*tmp0_31 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
527     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
528     o[INDEX3(i,1,1,numComp,3)] = f_100[i]*tmp0_23 + f_101[i]*tmp0_20 + f_110[i]*tmp0_22 + f_111[i]*tmp0_21;
529     o[INDEX3(i,2,1,numComp,3)] = f_100[i]*tmp0_35 + f_101[i]*tmp0_32 + f_110[i]*tmp0_34 + f_111[i]*tmp0_33;
530     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
531     o[INDEX3(i,1,2,numComp,3)] = f_100[i]*tmp0_27 + f_101[i]*tmp0_24 + f_110[i]*tmp0_26 + f_111[i]*tmp0_25;
532     o[INDEX3(i,2,2,numComp,3)] = f_100[i]*tmp0_31 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
533     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
534     o[INDEX3(i,1,3,numComp,3)] = f_100[i]*tmp0_27 + f_101[i]*tmp0_24 + f_110[i]*tmp0_26 + f_111[i]*tmp0_25;
535     o[INDEX3(i,2,3,numComp,3)] = f_100[i]*tmp0_35 + f_101[i]*tmp0_32 + f_110[i]*tmp0_34 + f_111[i]*tmp0_33;
536     } /* end of component loop i */
537     } /* end of k1 loop */
538     } /* end of k2 loop */
539     } /* end of face 1 */
540     if (m_faceOffset[2] > -1) {
541     const double tmp0_22 = -0.044658198738520451079/h1;
542     const double tmp0_16 = -0.16666666666666666667/h1;
543     const double tmp0_33 = 0.21132486540518711775/h2;
544     const double tmp0_0 = -0.78867513459481288225/h0;
545     const double tmp0_21 = 0.16666666666666666667/h1;
546     const double tmp0_17 = -0.62200846792814621559/h1;
547     const double tmp0_1 = -0.21132486540518711775/h0;
548     const double tmp0_20 = 0.044658198738520451079/h1;
549     const double tmp0_14 = -0.16666666666666666667/h1;
550     const double tmp0_2 = 0.21132486540518711775/h0;
551     const double tmp0_27 = 0.044658198738520451079/h1;
552     const double tmp0_15 = -0.044658198738520451079/h1;
553     const double tmp0_3 = 0.78867513459481288225/h0;
554     const double tmp0_26 = 0.16666666666666666667/h1;
555     const double tmp0_12 = 0.16666666666666666667/h1;
556     const double tmp0_25 = 0.62200846792814621559/h1;
557     const double tmp0_13 = 0.62200846792814621559/h1;
558     const double tmp0_24 = -0.62200846792814621559/h1;
559     const double tmp0_10 = -0.044658198738520451079/h1;
560     const double tmp0_11 = 0.044658198738520451079/h1;
561     const double tmp0_34 = 0.78867513459481288225/h2;
562     const double tmp0_35 = -0.78867513459481288225/h2;
563     const double tmp0_8 = -0.62200846792814621559/h1;
564     const double tmp0_29 = 0.78867513459481288225/h2;
565     const double tmp0_9 = -0.16666666666666666667/h1;
566     const double tmp0_30 = 0.21132486540518711775/h2;
567     const double tmp0_28 = -0.78867513459481288225/h2;
568     const double tmp0_32 = -0.21132486540518711775/h2;
569     const double tmp0_31 = -0.21132486540518711775/h2;
570     const double tmp0_18 = 0.16666666666666666667/h1;
571     const double tmp0_4 = -0.21132486540518711775/h0;
572     const double tmp0_19 = 0.62200846792814621559/h1;
573     const double tmp0_5 = -0.78867513459481288225/h0;
574     const double tmp0_6 = 0.78867513459481288225/h0;
575     const double tmp0_23 = -0.16666666666666666667/h1;
576     const double tmp0_7 = 0.21132486540518711775/h0;
577     #pragma omp parallel for
578     for (index_t k2=0; k2 < m_NE2; ++k2) {
579     for (index_t k0=0; k0 < m_NE0; ++k0) {
580     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
581     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
582     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
583     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
584     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
585     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
586     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
587     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
588     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
589     for (index_t i=0; i < numComp; ++i) {
590     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_001[i]*tmp0_1 + f_100[i]*tmp0_3 + f_101[i]*tmp0_2;
591     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_13 + f_101[i]*tmp0_10 + f_111[i]*tmp0_11 + tmp0_12*(f_011[i] + f_110[i]) + tmp0_9*(f_001[i] + f_100[i]);
592     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_100[i]*tmp0_31 + f_101[i]*tmp0_30;
593     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_001[i]*tmp0_1 + f_100[i]*tmp0_3 + f_101[i]*tmp0_2;
594     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_14 + f_001[i]*tmp0_15 + f_010[i]*tmp0_21 + f_011[i]*tmp0_20 + f_100[i]*tmp0_17 + f_101[i]*tmp0_16 + f_110[i]*tmp0_19 + f_111[i]*tmp0_18;
595     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_100[i]*tmp0_35 + f_101[i]*tmp0_34;
596     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_4 + f_001[i]*tmp0_5 + f_100[i]*tmp0_7 + f_101[i]*tmp0_6;
597     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_14 + f_001[i]*tmp0_17 + f_010[i]*tmp0_21 + f_011[i]*tmp0_19 + f_100[i]*tmp0_15 + f_101[i]*tmp0_16 + f_110[i]*tmp0_20 + f_111[i]*tmp0_18;
598     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_100[i]*tmp0_31 + f_101[i]*tmp0_30;
599     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_4 + f_001[i]*tmp0_5 + f_100[i]*tmp0_7 + f_101[i]*tmp0_6;
600     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_22 + f_010[i]*tmp0_27 + f_101[i]*tmp0_24 + f_111[i]*tmp0_25 + tmp0_23*(f_001[i] + f_100[i]) + tmp0_26*(f_011[i] + f_110[i]);
601     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_100[i]*tmp0_35 + f_101[i]*tmp0_34;
602     } /* end of component loop i */
603     } /* end of k0 loop */
604     } /* end of k2 loop */
605     } /* end of face 2 */
606     if (m_faceOffset[3] > -1) {
607     const double tmp0_22 = 0.16666666666666666667/h1;
608     const double tmp0_16 = 0.16666666666666666667/h1;
609     const double tmp0_33 = -0.78867513459481288225/h2;
610     const double tmp0_0 = -0.21132486540518711775/h0;
611     const double tmp0_21 = -0.62200846792814621559/h1;
612     const double tmp0_17 = 0.16666666666666666667/h1;
613     const double tmp0_1 = 0.78867513459481288225/h0;
614     const double tmp0_20 = -0.16666666666666666667/h1;
615     const double tmp0_14 = 0.044658198738520451079/h1;
616     const double tmp0_2 = -0.78867513459481288225/h0;
617     const double tmp0_27 = -0.62200846792814621559/h1;
618     const double tmp0_15 = 0.62200846792814621559/h1;
619     const double tmp0_3 = 0.21132486540518711775/h0;
620     const double tmp0_26 = -0.16666666666666666667/h1;
621     const double tmp0_12 = -0.16666666666666666667/h1;
622     const double tmp0_25 = -0.044658198738520451079/h1;
623     const double tmp0_13 = -0.044658198738520451079/h1;
624     const double tmp0_24 = 0.62200846792814621559/h1;
625     const double tmp0_10 = 0.044658198738520451079/h1;
626     const double tmp0_11 = -0.62200846792814621559/h1;
627     const double tmp0_34 = -0.21132486540518711775/h2;
628     const double tmp0_35 = 0.78867513459481288225/h2;
629     const double tmp0_8 = 0.16666666666666666667/h1;
630     const double tmp0_29 = -0.21132486540518711775/h2;
631     const double tmp0_9 = 0.62200846792814621559/h1;
632     const double tmp0_30 = -0.78867513459481288225/h2;
633     const double tmp0_28 = 0.78867513459481288225/h2;
634     const double tmp0_32 = 0.21132486540518711775/h2;
635     const double tmp0_31 = 0.21132486540518711775/h2;
636     const double tmp0_18 = -0.16666666666666666667/h1;
637     const double tmp0_4 = -0.78867513459481288225/h0;
638     const double tmp0_19 = -0.044658198738520451079/h1;
639     const double tmp0_5 = 0.21132486540518711775/h0;
640     const double tmp0_6 = -0.21132486540518711775/h0;
641     const double tmp0_23 = 0.044658198738520451079/h1;
642     const double tmp0_7 = 0.78867513459481288225/h0;
643     #pragma omp parallel for
644     for (index_t k2=0; k2 < m_NE2; ++k2) {
645     for (index_t k0=0; k0 < m_NE0; ++k0) {
646     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
647     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
648     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
649     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
650     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
651     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
652     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
653     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
654     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
655     for (index_t i=0; i < numComp; ++i) {
656     o[INDEX3(i,0,0,numComp,3)] = f_010[i]*tmp0_2 + f_011[i]*tmp0_0 + f_110[i]*tmp0_1 + f_111[i]*tmp0_3;
657     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_11 + f_010[i]*tmp0_9 + f_101[i]*tmp0_13 + f_111[i]*tmp0_10 + tmp0_12*(f_001[i] + f_100[i]) + tmp0_8*(f_011[i] + f_110[i]);
658     o[INDEX3(i,2,0,numComp,3)] = f_010[i]*tmp0_30 + f_011[i]*tmp0_28 + f_110[i]*tmp0_29 + f_111[i]*tmp0_31;
659     o[INDEX3(i,0,1,numComp,3)] = f_010[i]*tmp0_2 + f_011[i]*tmp0_0 + f_110[i]*tmp0_1 + f_111[i]*tmp0_3;
660     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_18 + f_001[i]*tmp0_19 + f_010[i]*tmp0_16 + f_011[i]*tmp0_14 + f_100[i]*tmp0_21 + f_101[i]*tmp0_20 + f_110[i]*tmp0_15 + f_111[i]*tmp0_17;
661     o[INDEX3(i,2,1,numComp,3)] = f_010[i]*tmp0_34 + f_011[i]*tmp0_32 + f_110[i]*tmp0_33 + f_111[i]*tmp0_35;
662     o[INDEX3(i,0,2,numComp,3)] = f_010[i]*tmp0_6 + f_011[i]*tmp0_4 + f_110[i]*tmp0_5 + f_111[i]*tmp0_7;
663     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_18 + f_001[i]*tmp0_21 + f_010[i]*tmp0_16 + f_011[i]*tmp0_15 + f_100[i]*tmp0_19 + f_101[i]*tmp0_20 + f_110[i]*tmp0_14 + f_111[i]*tmp0_17;
664     o[INDEX3(i,2,2,numComp,3)] = f_010[i]*tmp0_30 + f_011[i]*tmp0_28 + f_110[i]*tmp0_29 + f_111[i]*tmp0_31;
665     o[INDEX3(i,0,3,numComp,3)] = f_010[i]*tmp0_6 + f_011[i]*tmp0_4 + f_110[i]*tmp0_5 + f_111[i]*tmp0_7;
666     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_25 + f_010[i]*tmp0_23 + f_101[i]*tmp0_27 + f_111[i]*tmp0_24 + tmp0_22*(f_011[i] + f_110[i]) + tmp0_26*(f_001[i] + f_100[i]);
667     o[INDEX3(i,2,3,numComp,3)] = f_010[i]*tmp0_34 + f_011[i]*tmp0_32 + f_110[i]*tmp0_33 + f_111[i]*tmp0_35;
668     } /* end of component loop i */
669     } /* end of k0 loop */
670     } /* end of k2 loop */
671     } /* end of face 3 */
672     if (m_faceOffset[4] > -1) {
673     const double tmp0_22 = -0.16666666666666666667/h2;
674     const double tmp0_16 = -0.62200846792814621559/h2;
675     const double tmp0_33 = 0.044658198738520451079/h2;
676     const double tmp0_0 = -0.78867513459481288225/h0;
677     const double tmp0_21 = 0.044658198738520451079/h2;
678     const double tmp0_17 = -0.16666666666666666667/h2;
679     const double tmp0_1 = 0.78867513459481288225/h0;
680     const double tmp0_20 = 0.16666666666666666667/h2;
681     const double tmp0_14 = 0.78867513459481288225/h1;
682     const double tmp0_2 = 0.21132486540518711775/h0;
683     const double tmp0_27 = 0.62200846792814621559/h2;
684     const double tmp0_15 = 0.21132486540518711775/h1;
685     const double tmp0_3 = -0.21132486540518711775/h0;
686     const double tmp0_26 = 0.16666666666666666667/h2;
687     const double tmp0_12 = -0.21132486540518711775/h1;
688     const double tmp0_25 = -0.044658198738520451079/h2;
689     const double tmp0_13 = -0.78867513459481288225/h1;
690     const double tmp0_24 = -0.16666666666666666667/h2;
691     const double tmp0_10 = 0.21132486540518711775/h1;
692     const double tmp0_11 = 0.78867513459481288225/h1;
693     const double tmp0_34 = 0.16666666666666666667/h2;
694     const double tmp0_35 = 0.62200846792814621559/h2;
695     const double tmp0_8 = -0.78867513459481288225/h1;
696     const double tmp0_29 = 0.16666666666666666667/h2;
697     const double tmp0_9 = -0.21132486540518711775/h1;
698     const double tmp0_30 = -0.044658198738520451079/h2;
699     const double tmp0_28 = 0.044658198738520451079/h2;
700     const double tmp0_32 = -0.62200846792814621559/h2;
701     const double tmp0_31 = -0.16666666666666666667/h2;
702     const double tmp0_18 = -0.044658198738520451079/h2;
703     const double tmp0_4 = -0.21132486540518711775/h0;
704     const double tmp0_19 = 0.62200846792814621559/h2;
705     const double tmp0_5 = 0.21132486540518711775/h0;
706     const double tmp0_6 = 0.78867513459481288225/h0;
707     const double tmp0_23 = -0.62200846792814621559/h2;
708     const double tmp0_7 = -0.78867513459481288225/h0;
709     #pragma omp parallel for
710     for (index_t k1=0; k1 < m_NE1; ++k1) {
711     for (index_t k0=0; k0 < m_NE0; ++k0) {
712     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
713     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
714     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
715     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
716     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
717     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
718     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
719     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
720     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
721     for (index_t i=0; i < numComp; ++i) {
722     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_010[i]*tmp0_3 + f_100[i]*tmp0_1 + f_110[i]*tmp0_2;
723     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_11 + f_100[i]*tmp0_9 + f_110[i]*tmp0_10;
724     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_16 + f_001[i]*tmp0_19 + f_110[i]*tmp0_18 + f_111[i]*tmp0_21 + tmp0_17*(f_010[i] + f_100[i]) + tmp0_20*(f_011[i] + f_101[i]);
725     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_010[i]*tmp0_3 + f_100[i]*tmp0_1 + f_110[i]*tmp0_2;
726     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_12 + f_010[i]*tmp0_15 + f_100[i]*tmp0_13 + f_110[i]*tmp0_14;
727     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_22 + f_001[i]*tmp0_26 + f_010[i]*tmp0_25 + f_011[i]*tmp0_28 + f_100[i]*tmp0_23 + f_101[i]*tmp0_27 + f_110[i]*tmp0_24 + f_111[i]*tmp0_29;
728     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_4 + f_010[i]*tmp0_7 + f_100[i]*tmp0_5 + f_110[i]*tmp0_6;
729     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_11 + f_100[i]*tmp0_9 + f_110[i]*tmp0_10;
730     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_22 + f_001[i]*tmp0_26 + f_010[i]*tmp0_23 + f_011[i]*tmp0_27 + f_100[i]*tmp0_25 + f_101[i]*tmp0_28 + f_110[i]*tmp0_24 + f_111[i]*tmp0_29;
731     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_4 + f_010[i]*tmp0_7 + f_100[i]*tmp0_5 + f_110[i]*tmp0_6;
732     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_12 + f_010[i]*tmp0_15 + f_100[i]*tmp0_13 + f_110[i]*tmp0_14;
733     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_30 + f_001[i]*tmp0_33 + f_110[i]*tmp0_32 + f_111[i]*tmp0_35 + tmp0_31*(f_010[i] + f_100[i]) + tmp0_34*(f_011[i] + f_101[i]);
734     } /* end of component loop i */
735     } /* end of k0 loop */
736     } /* end of k1 loop */
737     } /* end of face 4 */
738     if (m_faceOffset[5] > -1) {
739     const double tmp0_22 = 0.16666666666666666667/h2;
740     const double tmp0_16 = 0.62200846792814621559/h2;
741     const double tmp0_33 = -0.044658198738520451079/h2;
742     const double tmp0_0 = -0.78867513459481288225/h0;
743     const double tmp0_21 = -0.16666666666666666667/h2;
744     const double tmp0_17 = 0.16666666666666666667/h2;
745     const double tmp0_1 = 0.78867513459481288225/h0;
746     const double tmp0_20 = -0.044658198738520451079/h2;
747     const double tmp0_14 = 0.21132486540518711775/h1;
748     const double tmp0_2 = -0.21132486540518711775/h0;
749     const double tmp0_27 = -0.16666666666666666667/h2;
750     const double tmp0_15 = 0.78867513459481288225/h1;
751     const double tmp0_3 = 0.21132486540518711775/h0;
752     const double tmp0_26 = -0.16666666666666666667/h2;
753     const double tmp0_12 = -0.21132486540518711775/h1;
754     const double tmp0_25 = 0.16666666666666666667/h2;
755     const double tmp0_13 = -0.78867513459481288225/h1;
756     const double tmp0_24 = 0.044658198738520451079/h2;
757     const double tmp0_10 = 0.78867513459481288225/h1;
758     const double tmp0_11 = 0.21132486540518711775/h1;
759     const double tmp0_34 = -0.62200846792814621559/h2;
760     const double tmp0_35 = -0.16666666666666666667/h2;
761     const double tmp0_8 = -0.78867513459481288225/h1;
762     const double tmp0_29 = -0.62200846792814621559/h2;
763     const double tmp0_9 = -0.21132486540518711775/h1;
764     const double tmp0_30 = 0.044658198738520451079/h2;
765     const double tmp0_28 = -0.044658198738520451079/h2;
766     const double tmp0_32 = 0.62200846792814621559/h2;
767     const double tmp0_31 = 0.16666666666666666667/h2;
768     const double tmp0_18 = 0.044658198738520451079/h2;
769     const double tmp0_4 = -0.21132486540518711775/h0;
770     const double tmp0_19 = -0.62200846792814621559/h2;
771     const double tmp0_5 = 0.21132486540518711775/h0;
772     const double tmp0_6 = -0.78867513459481288225/h0;
773     const double tmp0_23 = 0.62200846792814621559/h2;
774     const double tmp0_7 = 0.78867513459481288225/h0;
775     #pragma omp parallel for
776     for (index_t k1=0; k1 < m_NE1; ++k1) {
777     for (index_t k0=0; k0 < m_NE0; ++k0) {
778     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
779     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
780     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
781     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
782     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
783     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
784     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
785     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
786     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
787     for (index_t i=0; i < numComp; ++i) {
788     o[INDEX3(i,0,0,numComp,3)] = f_001[i]*tmp0_0 + f_011[i]*tmp0_2 + f_101[i]*tmp0_1 + f_111[i]*tmp0_3;
789     o[INDEX3(i,1,0,numComp,3)] = f_001[i]*tmp0_8 + f_011[i]*tmp0_10 + f_101[i]*tmp0_9 + f_111[i]*tmp0_11;
790     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_19 + f_001[i]*tmp0_16 + f_110[i]*tmp0_20 + f_111[i]*tmp0_18 + tmp0_17*(f_011[i] + f_101[i]) + tmp0_21*(f_010[i] + f_100[i]);
791     o[INDEX3(i,0,1,numComp,3)] = f_001[i]*tmp0_0 + f_011[i]*tmp0_2 + f_101[i]*tmp0_1 + f_111[i]*tmp0_3;
792     o[INDEX3(i,1,1,numComp,3)] = f_001[i]*tmp0_12 + f_011[i]*tmp0_14 + f_101[i]*tmp0_13 + f_111[i]*tmp0_15;
793     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_22 + f_010[i]*tmp0_28 + f_011[i]*tmp0_24 + f_100[i]*tmp0_29 + f_101[i]*tmp0_23 + f_110[i]*tmp0_27 + f_111[i]*tmp0_25;
794     o[INDEX3(i,0,2,numComp,3)] = f_001[i]*tmp0_4 + f_011[i]*tmp0_6 + f_101[i]*tmp0_5 + f_111[i]*tmp0_7;
795     o[INDEX3(i,1,2,numComp,3)] = f_001[i]*tmp0_8 + f_011[i]*tmp0_10 + f_101[i]*tmp0_9 + f_111[i]*tmp0_11;
796     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_22 + f_010[i]*tmp0_29 + f_011[i]*tmp0_23 + f_100[i]*tmp0_28 + f_101[i]*tmp0_24 + f_110[i]*tmp0_27 + f_111[i]*tmp0_25;
797     o[INDEX3(i,0,3,numComp,3)] = f_001[i]*tmp0_4 + f_011[i]*tmp0_6 + f_101[i]*tmp0_5 + f_111[i]*tmp0_7;
798     o[INDEX3(i,1,3,numComp,3)] = f_001[i]*tmp0_12 + f_011[i]*tmp0_14 + f_101[i]*tmp0_13 + f_111[i]*tmp0_15;
799     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_33 + f_001[i]*tmp0_30 + f_110[i]*tmp0_34 + f_111[i]*tmp0_32 + tmp0_31*(f_011[i] + f_101[i]) + tmp0_35*(f_010[i] + f_100[i]);
800     } /* end of component loop i */
801     } /* end of k0 loop */
802     } /* end of k1 loop */
803     } /* end of face 5 */
804     /* GENERATOR SNIP_GRAD_FACES BOTTOM */
805 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
806     /* GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
807     if (m_faceOffset[0] > -1) {
808     const double tmp0_0 = -0.25/h0;
809     const double tmp0_4 = -0.5/h2;
810     const double tmp0_1 = 0.25/h0;
811     const double tmp0_5 = 0.5/h2;
812     const double tmp0_2 = -0.5/h1;
813     const double tmp0_3 = 0.5/h1;
814     #pragma omp parallel for
815     for (index_t k2=0; k2 < m_NE2; ++k2) {
816     for (index_t k1=0; k1 < m_NE1; ++k1) {
817     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
818     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
819     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
820     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
821     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
822     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
823     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
824     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
825     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
826     for (index_t i=0; i < numComp; ++i) {
827     o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]) + tmp0_1*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
828     o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_000[i] + f_001[i]) + tmp0_3*(f_010[i] + f_011[i]);
829     o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_000[i] + f_010[i]) + tmp0_5*(f_001[i] + f_011[i]);
830     } /* end of component loop i */
831     } /* end of k1 loop */
832     } /* end of k2 loop */
833     } /* end of face 0 */
834     if (m_faceOffset[1] > -1) {
835     const double tmp0_0 = -0.25/h0;
836     const double tmp0_4 = 0.5/h2;
837     const double tmp0_1 = 0.25/h0;
838     const double tmp0_5 = -0.5/h2;
839     const double tmp0_2 = -0.5/h1;
840     const double tmp0_3 = 0.5/h1;
841     #pragma omp parallel for
842     for (index_t k2=0; k2 < m_NE2; ++k2) {
843     for (index_t k1=0; k1 < m_NE1; ++k1) {
844     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
845     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
846     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
847     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
848     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
849     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
850     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
851     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
852     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
853     for (index_t i=0; i < numComp; ++i) {
854     o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]) + tmp0_1*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
855     o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_100[i] + f_101[i]) + tmp0_3*(f_110[i] + f_111[i]);
856     o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_101[i] + f_111[i]) + tmp0_5*(f_100[i] + f_110[i]);
857     } /* end of component loop i */
858     } /* end of k1 loop */
859     } /* end of k2 loop */
860     } /* end of face 1 */
861     if (m_faceOffset[2] > -1) {
862     const double tmp0_0 = -0.5/h0;
863     const double tmp0_4 = -0.5/h2;
864     const double tmp0_1 = 0.5/h0;
865     const double tmp0_5 = 0.5/h2;
866     const double tmp0_2 = -0.25/h1;
867     const double tmp0_3 = 0.25/h1;
868     #pragma omp parallel for
869     for (index_t k2=0; k2 < m_NE2; ++k2) {
870     for (index_t k0=0; k0 < m_NE0; ++k0) {
871     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
872     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
873     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
874     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
875     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
876     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
877     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
878     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
879     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
880     for (index_t i=0; i < numComp; ++i) {
881     o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_000[i] + f_001[i]) + tmp0_1*(f_100[i] + f_101[i]);
882     o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_000[i] + f_001[i] + f_100[i] + f_101[i]) + tmp0_3*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
883     o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_000[i] + f_100[i]) + tmp0_5*(f_001[i] + f_101[i]);
884     } /* end of component loop i */
885     } /* end of k0 loop */
886     } /* end of k2 loop */
887     } /* end of face 2 */
888     if (m_faceOffset[3] > -1) {
889     const double tmp0_0 = -0.5/h0;
890     const double tmp0_4 = 0.5/h2;
891     const double tmp0_1 = 0.5/h0;
892     const double tmp0_5 = -0.5/h2;
893     const double tmp0_2 = 0.25/h1;
894     const double tmp0_3 = -0.25/h1;
895     #pragma omp parallel for
896     for (index_t k2=0; k2 < m_NE2; ++k2) {
897     for (index_t k0=0; k0 < m_NE0; ++k0) {
898     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
899     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
900     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
901     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
902     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
903     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
904     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
905     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
906     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
907     for (index_t i=0; i < numComp; ++i) {
908     o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_010[i] + f_011[i]) + tmp0_1*(f_110[i] + f_111[i]);
909     o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_010[i] + f_011[i] + f_110[i] + f_111[i]) + tmp0_3*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
910     o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_011[i] + f_111[i]) + tmp0_5*(f_010[i] + f_110[i]);
911     } /* end of component loop i */
912     } /* end of k0 loop */
913     } /* end of k2 loop */
914     } /* end of face 3 */
915     if (m_faceOffset[4] > -1) {
916     const double tmp0_0 = -0.5/h0;
917     const double tmp0_4 = -0.25/h2;
918     const double tmp0_1 = 0.5/h0;
919     const double tmp0_5 = 0.25/h2;
920     const double tmp0_2 = -0.5/h1;
921     const double tmp0_3 = 0.5/h1;
922     #pragma omp parallel for
923     for (index_t k1=0; k1 < m_NE1; ++k1) {
924     for (index_t k0=0; k0 < m_NE0; ++k0) {
925     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
926     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
927     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
928     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
929     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
930     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
931     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
932     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
933     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
934     for (index_t i=0; i < numComp; ++i) {
935     o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_000[i] + f_010[i]) + tmp0_1*(f_100[i] + f_110[i]);
936     o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_000[i] + f_100[i]) + tmp0_3*(f_010[i] + f_110[i]);
937     o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_000[i] + f_010[i] + f_100[i] + f_110[i]) + tmp0_5*(f_001[i] + f_011[i] + f_101[i] + f_111[i]);
938     } /* end of component loop i */
939     } /* end of k0 loop */
940     } /* end of k1 loop */
941     } /* end of face 4 */
942     if (m_faceOffset[5] > -1) {
943     const double tmp0_0 = -0.5/h0;
944     const double tmp0_4 = 0.25/h2;
945     const double tmp0_1 = 0.5/h0;
946     const double tmp0_5 = -0.25/h2;
947     const double tmp0_2 = -0.5/h1;
948     const double tmp0_3 = 0.5/h1;
949     #pragma omp parallel for
950     for (index_t k1=0; k1 < m_NE1; ++k1) {
951     for (index_t k0=0; k0 < m_NE0; ++k0) {
952     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
953     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
954     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
955     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
956     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
957     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
958     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
959     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
960     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
961     for (index_t i=0; i < numComp; ++i) {
962     o[INDEX3(i,0,0,numComp,3)] = tmp0_0*(f_001[i] + f_011[i]) + tmp0_1*(f_101[i] + f_111[i]);
963     o[INDEX3(i,1,0,numComp,3)] = tmp0_2*(f_001[i] + f_101[i]) + tmp0_3*(f_011[i] + f_111[i]);
964     o[INDEX3(i,2,0,numComp,3)] = tmp0_4*(f_001[i] + f_011[i] + f_101[i] + f_111[i]) + tmp0_5*(f_000[i] + f_010[i] + f_100[i] + f_110[i]);
965     } /* end of component loop i */
966     } /* end of k0 loop */
967     } /* end of k1 loop */
968     } /* end of face 5 */
969     /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
970 caltinay 3703 } else {
971 caltinay 3707 stringstream msg;
972     msg << "setToGradient() not implemented for "
973     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
974     throw RipleyException(msg.str());
975 caltinay 3703 }
976     }
977    
978 caltinay 3691 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
979     bool reducedColOrder) const
980     {
981     if (reducedRowOrder || reducedColOrder)
982     throw RipleyException("getPattern() not implemented for reduced order");
983    
984     throw RipleyException("getPattern() not implemented");
985     }
986    
987     void Brick::Print_Mesh_Info(const bool full) const
988     {
989     RipleyDomain::Print_Mesh_Info(full);
990     if (full) {
991     cout << " Id Coordinates" << endl;
992     cout.precision(15);
993     cout.setf(ios::scientific, ios::floatfield);
994 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
995     pair<double,double> ydy = getFirstCoordAndSpacing(1);
996     pair<double,double> zdz = getFirstCoordAndSpacing(2);
997 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
998     cout << " " << setw(5) << m_nodeId[i]
999 caltinay 3698 << " " << xdx.first+(i%m_N0)*xdx.second
1000     << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1001     << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1002 caltinay 3691 }
1003     }
1004     }
1005    
1006 caltinay 3698 IndexVector Brick::getNumNodesPerDim() const
1007     {
1008     IndexVector ret;
1009     ret.push_back(m_N0);
1010     ret.push_back(m_N1);
1011     ret.push_back(m_N2);
1012     return ret;
1013     }
1014    
1015     IndexVector Brick::getNumElementsPerDim() const
1016     {
1017     IndexVector ret;
1018     ret.push_back(m_NE0);
1019     ret.push_back(m_NE1);
1020     ret.push_back(m_NE2);
1021     return ret;
1022     }
1023    
1024     IndexVector Brick::getNumFacesPerBoundary() const
1025     {
1026     IndexVector ret(6, 0);
1027     //left
1028     if (m_offset0==0)
1029     ret[0]=m_NE1*m_NE2;
1030     //right
1031     if (m_mpiInfo->rank%m_NX==m_NX-1)
1032     ret[1]=m_NE1*m_NE2;
1033     //bottom
1034     if (m_offset1==0)
1035     ret[2]=m_NE0*m_NE2;
1036     //top
1037     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1038     ret[3]=m_NE0*m_NE2;
1039     //front
1040     if (m_offset2==0)
1041     ret[4]=m_NE0*m_NE1;
1042     //back
1043     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1044     ret[5]=m_NE0*m_NE1;
1045     return ret;
1046     }
1047    
1048     pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1049     {
1050     if (dim==0)
1051     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1052     else if (dim==1)
1053     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1054     else if (dim==2)
1055     return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1056    
1057     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1058     }
1059    
1060    
1061 caltinay 3691 //protected
1062     dim_t Brick::getNumFaceElements() const
1063     {
1064     dim_t n=0;
1065     //left
1066     if (m_offset0==0)
1067     n+=m_NE1*m_NE2;
1068     //right
1069     if (m_mpiInfo->rank%m_NX==m_NX-1)
1070     n+=m_NE1*m_NE2;
1071     //bottom
1072     if (m_offset1==0)
1073     n+=m_NE0*m_NE2;
1074     //top
1075     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1076     n+=m_NE0*m_NE2;
1077     //front
1078     if (m_offset2==0)
1079     n+=m_NE0*m_NE1;
1080     //back
1081     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1082     n+=m_NE0*m_NE1;
1083    
1084     return n;
1085     }
1086    
1087     //protected
1088     void Brick::assembleCoordinates(escript::Data& arg) const
1089     {
1090     escriptDataC x = arg.getDataC();
1091     int numDim = m_numDim;
1092     if (!isDataPointShapeEqual(&x, 1, &numDim))
1093     throw RipleyException("setToX: Invalid Data object shape");
1094     if (!numSamplesEqual(&x, 1, getNumNodes()))
1095     throw RipleyException("setToX: Illegal number of samples in Data object");
1096    
1097 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1098     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1099     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1100 caltinay 3691 arg.requireWrite();
1101     #pragma omp parallel for
1102     for (dim_t i2 = 0; i2 < m_N2; i2++) {
1103     for (dim_t i1 = 0; i1 < m_N1; i1++) {
1104     for (dim_t i0 = 0; i0 < m_N0; i0++) {
1105     double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1106 caltinay 3698 point[0] = xdx.first+i0*xdx.second;
1107     point[1] = ydy.first+i1*ydy.second;
1108     point[2] = zdz.first+i2*zdz.second;
1109 caltinay 3691 }
1110     }
1111     }
1112     }
1113    
1114     //private
1115     void Brick::populateSampleIds()
1116     {
1117 caltinay 3697 // identifiers are ordered from left to right, bottom to top, front to back
1118     // on each rank, except for the shared nodes which are owned by the rank
1119     // below / to the left / to the front of the current rank
1120 caltinay 3698
1121     // build node distribution vector first.
1122     // m_nodeDistribution[i] is the first node id on rank i, that is
1123     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1124     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1125     m_nodeDistribution[1]=getNumNodes();
1126     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1127     const index_t x = k%m_NX;
1128     const index_t y = k%(m_NX*m_NY)/m_NX;
1129     const index_t z = k/(m_NX*m_NY);
1130     index_t numNodes=getNumNodes();
1131     if (x>0)
1132     numNodes-=m_N1*m_N2;
1133     if (y>0)
1134     numNodes-=m_N0*m_N2;
1135     if (z>0)
1136     numNodes-=m_N0*m_N1;
1137     // if an edge was subtracted twice add it back
1138     if (x>0 && y>0)
1139     numNodes+=m_N2;
1140     if (x>0 && z>0)
1141     numNodes+=m_N1;
1142     if (y>0 && z>0)
1143     numNodes+=m_N0;
1144     // the corner node was removed 3x and added back 3x, so subtract it
1145     if (x>0 && y>0 && z>0)
1146     numNodes--;
1147     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1148     }
1149     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1150    
1151 caltinay 3691 m_nodeId.resize(getNumNodes());
1152 caltinay 3698
1153     // the bottom, left and front planes are not owned by this rank so the
1154     // identifiers need to be computed accordingly
1155     const index_t left = (m_offset0==0 ? 0 : 1);
1156     const index_t bottom = (m_offset1==0 ? 0 : 1);
1157     const index_t front = (m_offset2==0 ? 0 : 1);
1158    
1159     // case 1: all nodes on left plane are owned by rank on the left
1160     if (left>0) {
1161     const int neighbour=m_mpiInfo->rank-1;
1162     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1163     const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1164 caltinay 3691 #pragma omp parallel for
1165 caltinay 3698 for (dim_t i2=front; i2<m_N2; i2++) {
1166     for (dim_t i1=bottom; i1<m_N1; i1++) {
1167     m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1168     + (i1-bottom+1)*leftN0
1169     + (i2-front)*leftN0*leftN1 - 1;
1170     }
1171     }
1172 caltinay 3691 }
1173 caltinay 3698 // case 2: all nodes on bottom plane are owned by rank below
1174     if (bottom>0) {
1175     const int neighbour=m_mpiInfo->rank-m_NX;
1176     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1177     const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1178     #pragma omp parallel for
1179     for (dim_t i2=front; i2<m_N2; i2++) {
1180     for (dim_t i0=left; i0<m_N0; i0++) {
1181     m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1182     + bottomN0*(bottomN1-1)
1183     + (i2-front)*bottomN0*bottomN1 + i0-left;
1184     }
1185     }
1186     }
1187     // case 3: all nodes on front plane are owned by rank in front
1188     if (front>0) {
1189     const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
1190     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1191     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1192     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1193     #pragma omp parallel for
1194     for (dim_t i1=bottom; i1<m_N1; i1++) {
1195     for (dim_t i0=left; i0<m_N0; i0++) {
1196     m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
1197     + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
1198     }
1199     }
1200     }
1201     // case 4: nodes on front bottom edge are owned by the corresponding rank
1202     if (front>0 && bottom>0) {
1203     const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
1204     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1205     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1206     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1207     #pragma omp parallel for
1208     for (dim_t i0=left; i0<m_N0; i0++) {
1209     m_nodeId[i0]=m_nodeDistribution[neighbour]
1210     + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
1211     }
1212     }
1213     // case 5: nodes on left bottom edge are owned by the corresponding rank
1214     if (left>0 && bottom>0) {
1215     const int neighbour=m_mpiInfo->rank-m_NX-1;
1216     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1217     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1218     #pragma omp parallel for
1219     for (dim_t i2=front; i2<m_N2; i2++) {
1220     m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1221     + (1+i2-front)*N0*N1-1;
1222     }
1223     }
1224     // case 6: nodes on left front edge are owned by the corresponding rank
1225     if (left>0 && front>0) {
1226     const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
1227     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1228     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1229     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1230     #pragma omp parallel for
1231     for (dim_t i1=bottom; i1<m_N1; i1++) {
1232     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1233     + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
1234     }
1235     }
1236     // case 7: bottom-left-front corner node owned by corresponding rank
1237     if (left>0 && bottom>0 && front>0) {
1238     const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
1239     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1240     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1241     const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
1242     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
1243     }
1244 caltinay 3697
1245 caltinay 3698 // the rest of the id's are contiguous
1246     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1247     #pragma omp parallel for
1248     for (dim_t i2=front; i2<m_N2; i2++) {
1249     for (dim_t i1=bottom; i1<m_N1; i1++) {
1250     for (dim_t i0=left; i0<m_N0; i0++) {
1251     m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
1252     +(i1-bottom)*(m_N0-left)
1253     +(i2-front)*(m_N0-left)*(m_N1-bottom);
1254     }
1255     }
1256     }
1257    
1258 caltinay 3697 // elements
1259     m_elementId.resize(getNumElements());
1260     #pragma omp parallel for
1261     for (dim_t k=0; k<getNumElements(); k++) {
1262     m_elementId[k]=k;
1263     }
1264    
1265     // face elements
1266     m_faceId.resize(getNumFaceElements());
1267     #pragma omp parallel for
1268     for (dim_t k=0; k<getNumFaceElements(); k++) {
1269     m_faceId[k]=k;
1270     }
1271 caltinay 3704
1272     // generate face offset vector
1273     const IndexVector facesPerEdge = getNumFacesPerBoundary();
1274     m_faceOffset.assign(facesPerEdge.size(), -1);
1275     index_t offset=0;
1276     for (size_t i=0; i<facesPerEdge.size(); i++) {
1277     if (facesPerEdge[i]>0) {
1278     m_faceOffset[i]=offset;
1279     offset+=facesPerEdge[i];
1280     }
1281     }
1282 caltinay 3691 }
1283    
1284 caltinay 3703 //protected
1285 caltinay 3711 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1286     bool reduced) const
1287 caltinay 3703 {
1288     const dim_t numComp = in.getDataPointSize();
1289 caltinay 3711 if (reduced) {
1290     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1291     const double tmp0_0 = 0.12500000000000000000;
1292 caltinay 3703 #pragma omp parallel for
1293 caltinay 3711 for (index_t k2=0; k2 < m_NE2; ++k2) {
1294     for (index_t k1=0; k1 < m_NE1; ++k1) {
1295     for (index_t k0=0; k0 < m_NE0; ++k0) {
1296     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1297     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1298     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1299     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1300     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1301     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1302     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1303     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1304     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1305     for (index_t i=0; i < numComp; ++i) {
1306     o[INDEX2(i,numComp,0)] = tmp0_0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1307     } /* end of component loop i */
1308     } /* end of k0 loop */
1309     } /* end of k1 loop */
1310     } /* end of k2 loop */
1311     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1312     } else {
1313     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1314     const double tmp0_3 = 0.0094373878376559314545;
1315     const double tmp0_2 = 0.035220810900864519624;
1316     const double tmp0_1 = 0.13144585576580214704;
1317     const double tmp0_0 = 0.49056261216234406855;
1318     #pragma omp parallel for
1319     for (index_t k2=0; k2 < m_NE2; ++k2) {
1320     for (index_t k1=0; k1 < m_NE1; ++k1) {
1321     for (index_t k0=0; k0 < m_NE0; ++k0) {
1322     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1323     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1324     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1325     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1326     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1327     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1328     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1329     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1330     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1331     for (index_t i=0; i < numComp; ++i) {
1332     o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i] + f_100[i]) + tmp0_2*(f_011[i] + f_101[i] + f_110[i]);
1333     o[INDEX2(i,numComp,1)] = f_011[i]*tmp0_3 + f_100[i]*tmp0_0 + tmp0_1*(f_000[i] + f_101[i] + f_110[i]) + tmp0_2*(f_001[i] + f_010[i] + f_111[i]);
1334     o[INDEX2(i,numComp,2)] = f_010[i]*tmp0_0 + f_101[i]*tmp0_3 + tmp0_1*(f_000[i] + f_011[i] + f_110[i]) + tmp0_2*(f_001[i] + f_100[i] + f_111[i]);
1335     o[INDEX2(i,numComp,3)] = f_001[i]*tmp0_3 + f_110[i]*tmp0_0 + tmp0_1*(f_010[i] + f_100[i] + f_111[i]) + tmp0_2*(f_000[i] + f_011[i] + f_101[i]);
1336     o[INDEX2(i,numComp,4)] = f_001[i]*tmp0_0 + f_110[i]*tmp0_3 + tmp0_1*(f_000[i] + f_011[i] + f_101[i]) + tmp0_2*(f_010[i] + f_100[i] + f_111[i]);
1337     o[INDEX2(i,numComp,5)] = f_010[i]*tmp0_3 + f_101[i]*tmp0_0 + tmp0_1*(f_001[i] + f_100[i] + f_111[i]) + tmp0_2*(f_000[i] + f_011[i] + f_110[i]);
1338     o[INDEX2(i,numComp,6)] = f_011[i]*tmp0_0 + f_100[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i] + f_111[i]) + tmp0_2*(f_000[i] + f_101[i] + f_110[i]);
1339     o[INDEX2(i,numComp,7)] = f_000[i]*tmp0_3 + f_111[i]*tmp0_0 + tmp0_1*(f_011[i] + f_101[i] + f_110[i]) + tmp0_2*(f_001[i] + f_010[i] + f_100[i]);
1340     } /* end of component loop i */
1341     } /* end of k0 loop */
1342     } /* end of k1 loop */
1343     } /* end of k2 loop */
1344     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1345     }
1346 caltinay 3703 }
1347    
1348     //protected
1349 caltinay 3711 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1350     bool reduced) const
1351 caltinay 3703 {
1352 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1353 caltinay 3711 if (reduced) {
1354     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1355     if (m_faceOffset[0] > -1) {
1356     const double tmp0_0 = 0.25000000000000000000;
1357 caltinay 3704 #pragma omp parallel for
1358 caltinay 3711 for (index_t k2=0; k2 < m_NE2; ++k2) {
1359     for (index_t k1=0; k1 < m_NE1; ++k1) {
1360     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1361     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1362     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1363     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1364     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1365     for (index_t i=0; i < numComp; ++i) {
1366     o[INDEX2(i,numComp,0)] = tmp0_0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1367     } /* end of component loop i */
1368     } /* end of k1 loop */
1369     } /* end of k2 loop */
1370     } /* end of face 0 */
1371     if (m_faceOffset[1] > -1) {
1372     const double tmp0_0 = 0.25000000000000000000;
1373     #pragma omp parallel for
1374     for (index_t k2=0; k2 < m_NE2; ++k2) {
1375     for (index_t k1=0; k1 < m_NE1; ++k1) {
1376     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1377     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1378     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1379     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1380     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1381     for (index_t i=0; i < numComp; ++i) {
1382     o[INDEX2(i,numComp,0)] = tmp0_0*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1383     } /* end of component loop i */
1384     } /* end of k1 loop */
1385     } /* end of k2 loop */
1386     } /* end of face 1 */
1387     if (m_faceOffset[2] > -1) {
1388     const double tmp0_0 = 0.25000000000000000000;
1389     #pragma omp parallel for
1390     for (index_t k2=0; k2 < m_NE2; ++k2) {
1391     for (index_t k0=0; k0 < m_NE0; ++k0) {
1392     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1393     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1394     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1395     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1396     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1397     for (index_t i=0; i < numComp; ++i) {
1398     o[INDEX2(i,numComp,0)] = tmp0_0*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
1399     } /* end of component loop i */
1400     } /* end of k0 loop */
1401     } /* end of k2 loop */
1402     } /* end of face 2 */
1403     if (m_faceOffset[3] > -1) {
1404     const double tmp0_0 = 0.25000000000000000000;
1405     #pragma omp parallel for
1406     for (index_t k2=0; k2 < m_NE2; ++k2) {
1407     for (index_t k0=0; k0 < m_NE0; ++k0) {
1408     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1409     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1410     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1411     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1412     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1413     for (index_t i=0; i < numComp; ++i) {
1414     o[INDEX2(i,numComp,0)] = tmp0_0*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
1415     } /* end of component loop i */
1416     } /* end of k0 loop */
1417     } /* end of k2 loop */
1418     } /* end of face 3 */
1419     if (m_faceOffset[4] > -1) {
1420     const double tmp0_0 = 0.25000000000000000000;
1421     #pragma omp parallel for
1422 caltinay 3704 for (index_t k1=0; k1 < m_NE1; ++k1) {
1423 caltinay 3711 for (index_t k0=0; k0 < m_NE0; ++k0) {
1424     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1425     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1426     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1427     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1428     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1429     for (index_t i=0; i < numComp; ++i) {
1430     o[INDEX2(i,numComp,0)] = tmp0_0*(f_000[i] + f_010[i] + f_100[i] + f_110[i]);
1431     } /* end of component loop i */
1432     } /* end of k0 loop */
1433 caltinay 3704 } /* end of k1 loop */
1434 caltinay 3711 } /* end of face 4 */
1435     if (m_faceOffset[5] > -1) {
1436     const double tmp0_0 = 0.25000000000000000000;
1437 caltinay 3704 #pragma omp parallel for
1438     for (index_t k1=0; k1 < m_NE1; ++k1) {
1439 caltinay 3711 for (index_t k0=0; k0 < m_NE0; ++k0) {
1440     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1441     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1,