/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3753 - (hide annotations)
Tue Jan 3 09:01:49 2012 UTC (7 years, 2 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 91022 byte(s)
Took over Node->DOF interpolation to Brick and parallelised some loops.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Brick.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33     int d1, int d2) :
34     RipleyDomain(3),
35     m_gNE0(n0),
36     m_gNE1(n1),
37     m_gNE2(n2),
38     m_l0(l0),
39     m_l1(l1),
40     m_l2(l2),
41     m_NX(d0),
42     m_NY(d1),
43     m_NZ(d2)
44     {
45     // ensure number of subdivisions is valid and nodes can be distributed
46     // among number of ranks
47     if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48     throw RipleyException("Invalid number of spatial subdivisions");
49    
50 caltinay 3753 if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0 || (n2+1)%m_NZ > 0)
51     throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension");
52 caltinay 3691
53 caltinay 3753 if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2) || (m_NZ > 1 && (n2+1)/m_NZ<2))
54     throw RipleyException("Too few elements for the number of ranks");
55    
56     // local number of elements (including overlap)
57     m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0);
58     if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1)
59     m_NE0++;
60     m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1);
61     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX>0 && m_mpiInfo->rank%(m_NX*m_NY)/m_NX<m_NY-1)
62     m_NE1++;
63     m_NE2 = (m_NZ>1 ? (n2+1)/m_NZ : n2);
64     if (m_mpiInfo->rank/(m_NX*m_NY)>0 && m_mpiInfo->rank/(m_NX*m_NY)<m_NZ-1)
65     m_NE2++;
66    
67     // local number of nodes
68 caltinay 3691 m_N0 = m_NE0+1;
69     m_N1 = m_NE1+1;
70     m_N2 = m_NE2+1;
71 caltinay 3753
72 caltinay 3691 // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
73 caltinay 3753 m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX);
74     if (m_offset0 > 0)
75     m_offset0--;
76     m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
77     if (m_offset1 > 0)
78     m_offset1--;
79     m_offset2 = (n2+1)/m_NZ*(m_mpiInfo->rank/(m_NX*m_NY));
80     if (m_offset2 > 0)
81     m_offset2--;
82    
83 caltinay 3691 populateSampleIds();
84     }
85    
86    
87     Brick::~Brick()
88     {
89     }
90    
91     string Brick::getDescription() const
92     {
93     return "ripley::Brick";
94     }
95    
96     bool Brick::operator==(const AbstractDomain& other) const
97     {
98 caltinay 3744 const Brick* o=dynamic_cast<const Brick*>(&other);
99     if (o) {
100     return (RipleyDomain::operator==(other) &&
101     m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 && m_gNE2==o->m_gNE2
102     && m_l0==o->m_l0 && m_l1==o->m_l1 && m_l2==o->m_l2
103     && m_NX==o->m_NX && m_NY==o->m_NY && m_NZ==o->m_NZ);
104     }
105 caltinay 3691
106     return false;
107     }
108    
109     void Brick::dump(const string& fileName) const
110     {
111     #if USE_SILO
112     string fn(fileName);
113     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
114     fn+=".silo";
115     }
116    
117     const int NUM_SILO_FILES = 1;
118     const char* blockDirFmt = "/block%04d";
119     int driver=DB_HDF5;
120     string siloPath;
121     DBfile* dbfile = NULL;
122    
123     #ifdef ESYS_MPI
124     PMPIO_baton_t* baton = NULL;
125     #endif
126    
127     if (m_mpiInfo->size > 1) {
128     #ifdef ESYS_MPI
129     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
130     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
131     PMPIO_DefaultClose, (void*)&driver);
132     // try the fallback driver in case of error
133     if (!baton && driver != DB_PDB) {
134     driver = DB_PDB;
135     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
136     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
137     PMPIO_DefaultClose, (void*)&driver);
138     }
139     if (baton) {
140     char str[64];
141     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
142     siloPath = str;
143     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
144     }
145     #endif
146     } else {
147     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
148     getDescription().c_str(), driver);
149     // try the fallback driver in case of error
150     if (!dbfile && driver != DB_PDB) {
151     driver = DB_PDB;
152     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
153     getDescription().c_str(), driver);
154     }
155     }
156    
157     if (!dbfile)
158     throw RipleyException("dump: Could not create Silo file");
159    
160     /*
161     if (driver==DB_HDF5) {
162     // gzip level 1 already provides good compression with minimal
163     // performance penalty. Some tests showed that gzip levels >3 performed
164     // rather badly on escript data both in terms of time and space
165     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
166     }
167     */
168    
169     boost::scoped_ptr<double> x(new double[m_N0]);
170     boost::scoped_ptr<double> y(new double[m_N1]);
171     boost::scoped_ptr<double> z(new double[m_N2]);
172     double* coords[3] = { x.get(), y.get(), z.get() };
173 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
174     pair<double,double> ydy = getFirstCoordAndSpacing(1);
175     pair<double,double> zdz = getFirstCoordAndSpacing(2);
176 caltinay 3691 #pragma omp parallel
177     {
178     #pragma omp for
179     for (dim_t i0 = 0; i0 < m_N0; i0++) {
180 caltinay 3698 coords[0][i0]=xdx.first+i0*xdx.second;
181 caltinay 3691 }
182     #pragma omp for
183     for (dim_t i1 = 0; i1 < m_N1; i1++) {
184 caltinay 3698 coords[1][i1]=ydy.first+i1*ydy.second;
185 caltinay 3691 }
186     #pragma omp for
187     for (dim_t i2 = 0; i2 < m_N2; i2++) {
188 caltinay 3698 coords[2][i2]=zdz.first+i2*zdz.second;
189 caltinay 3691 }
190     }
191 caltinay 3698 IndexVector dims = getNumNodesPerDim();
192     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
193 caltinay 3691 DB_COLLINEAR, NULL);
194    
195 caltinay 3698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
196     NULL, 0, DB_INT, DB_NODECENT, NULL);
197 caltinay 3691
198 caltinay 3698 // write element ids
199     dims = getNumElementsPerDim();
200     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
201     &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
202    
203     // rank 0 writes multimesh and multivar
204 caltinay 3691 if (m_mpiInfo->rank == 0) {
205     vector<string> tempstrings;
206     vector<char*> names;
207     for (dim_t i=0; i<m_mpiInfo->size; i++) {
208     stringstream path;
209     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
210     tempstrings.push_back(path.str());
211     names.push_back((char*)tempstrings.back().c_str());
212     }
213     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
214     DBSetDir(dbfile, "/");
215     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
216     &types[0], NULL);
217     tempstrings.clear();
218     names.clear();
219     for (dim_t i=0; i<m_mpiInfo->size; i++) {
220     stringstream path;
221     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
222     tempstrings.push_back(path.str());
223     names.push_back((char*)tempstrings.back().c_str());
224     }
225     types.assign(m_mpiInfo->size, DB_QUADVAR);
226     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
227     &types[0], NULL);
228 caltinay 3698 tempstrings.clear();
229     names.clear();
230     for (dim_t i=0; i<m_mpiInfo->size; i++) {
231     stringstream path;
232     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
233     tempstrings.push_back(path.str());
234     names.push_back((char*)tempstrings.back().c_str());
235     }
236     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
237     &types[0], NULL);
238 caltinay 3691 }
239    
240     if (m_mpiInfo->size > 1) {
241     #ifdef ESYS_MPI
242     PMPIO_HandOffBaton(baton, dbfile);
243     PMPIO_Finish(baton);
244     #endif
245     } else {
246     DBClose(dbfile);
247     }
248    
249     #else // USE_SILO
250     throw RipleyException("dump(): no Silo support");
251     #endif
252     }
253    
254     const int* Brick::borrowSampleReferenceIDs(int fsType) const
255     {
256 caltinay 3697 switch (fsType) {
257     case Nodes:
258 caltinay 3748 case ReducedNodes: //FIXME: reduced
259 caltinay 3753 return &m_nodeId[0];
260 caltinay 3750 case DegreesOfFreedom: //FIXME
261     case ReducedDegreesOfFreedom: //FIXME
262 caltinay 3753 return &m_dofId[0];
263 caltinay 3697 case Elements:
264 caltinay 3733 case ReducedElements:
265 caltinay 3697 return &m_elementId[0];
266 caltinay 3733 case ReducedFaceElements:
267 caltinay 3697 return &m_faceId[0];
268     default:
269     break;
270     }
271 caltinay 3691
272 caltinay 3697 stringstream msg;
273     msg << "borrowSampleReferenceIDs() not implemented for function space type "
274     << fsType;
275     throw RipleyException(msg.str());
276 caltinay 3691 }
277    
278     bool Brick::ownSample(int fsCode, index_t id) const
279     {
280     #ifdef ESYS_MPI
281     if (fsCode == Nodes) {
282 caltinay 3753 const index_t left = (m_offset0==0 ? 0 : 1);
283     const index_t bottom = (m_offset1==0 ? 0 : 1);
284     const index_t front = (m_offset2==0 ? 0 : 1);
285     const index_t right = (m_mpiInfo->rank%m_NX==m_NX-1 ? m_N0 : m_N0-1);
286     const index_t top = (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1 ? m_N1 : m_N1-1);
287     const index_t back = (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1 ? m_N2 : m_N2-1);
288     const index_t x=id%m_N0;
289     const index_t y=id%(m_N0*m_N1)/m_N0;
290     const index_t z=id/(m_N0*m_N1);
291     return (x>=left && x<right && y>=bottom && y<top && z>=front && z<back);
292    
293     } else {
294     stringstream msg;
295     msg << "ownSample() not implemented for "
296     << functionSpaceTypeAsString(fsCode);
297     throw RipleyException(msg.str());
298     }
299 caltinay 3691 #else
300     return true;
301     #endif
302     }
303    
304 caltinay 3703 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
305     {
306     escript::Data& in = *const_cast<escript::Data*>(&cIn);
307     const dim_t numComp = in.getDataPointSize();
308     const double h0 = m_l0/m_gNE0;
309     const double h1 = m_l1/m_gNE1;
310     const double h2 = m_l1/m_gNE2;
311 caltinay 3731 const double C0 = .044658198738520451079;
312     const double C1 = .16666666666666666667;
313     const double C2 = .21132486540518711775;
314     const double C3 = .25;
315     const double C4 = .5;
316     const double C5 = .62200846792814621559;
317     const double C6 = .78867513459481288225;
318    
319 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
320 caltinay 3731 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
321 caltinay 3703 #pragma omp parallel for
322 caltinay 3707 for (index_t k2=0; k2 < m_NE2; ++k2) {
323     for (index_t k1=0; k1 < m_NE1; ++k1) {
324     for (index_t k0=0; k0 < m_NE0; ++k0) {
325 caltinay 3703 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
326     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
327     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
328     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
329     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
330     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
331     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
332     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
333     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
334     for (index_t i=0; i < numComp; ++i) {
335 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
336     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
337     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
338     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
339     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
340     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
341     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
342     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
343     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
344     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
345     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
346     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
347     o[INDEX3(i,0,0,numComp,3)] = V0;
348     o[INDEX3(i,1,0,numComp,3)] = V4;
349     o[INDEX3(i,2,0,numComp,3)] = V8;
350     o[INDEX3(i,0,1,numComp,3)] = V0;
351     o[INDEX3(i,1,1,numComp,3)] = V5;
352     o[INDEX3(i,2,1,numComp,3)] = V9;
353     o[INDEX3(i,0,2,numComp,3)] = V1;
354     o[INDEX3(i,1,2,numComp,3)] = V4;
355     o[INDEX3(i,2,2,numComp,3)] = V10;
356     o[INDEX3(i,0,3,numComp,3)] = V1;
357     o[INDEX3(i,1,3,numComp,3)] = V5;
358     o[INDEX3(i,2,3,numComp,3)] = V11;
359     o[INDEX3(i,0,4,numComp,3)] = V2;
360     o[INDEX3(i,1,4,numComp,3)] = V6;
361     o[INDEX3(i,2,4,numComp,3)] = V8;
362     o[INDEX3(i,0,5,numComp,3)] = V2;
363     o[INDEX3(i,1,5,numComp,3)] = V7;
364     o[INDEX3(i,2,5,numComp,3)] = V9;
365     o[INDEX3(i,0,6,numComp,3)] = V3;
366     o[INDEX3(i,1,6,numComp,3)] = V6;
367     o[INDEX3(i,2,6,numComp,3)] = V10;
368     o[INDEX3(i,0,7,numComp,3)] = V3;
369     o[INDEX3(i,1,7,numComp,3)] = V7;
370     o[INDEX3(i,2,7,numComp,3)] = V11;
371 caltinay 3703 } /* end of component loop i */
372     } /* end of k0 loop */
373     } /* end of k1 loop */
374     } /* end of k2 loop */
375     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
376 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
377 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
378 caltinay 3711 #pragma omp parallel for
379     for (index_t k2=0; k2 < m_NE2; ++k2) {
380     for (index_t k1=0; k1 < m_NE1; ++k1) {
381     for (index_t k0=0; k0 < m_NE0; ++k0) {
382     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
383     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
384     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
385     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
386     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
387     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
388     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
389     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
390     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
391     for (index_t i=0; i < numComp; ++i) {
392 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
393     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
394     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
395 caltinay 3711 } /* end of component loop i */
396     } /* end of k0 loop */
397     } /* end of k1 loop */
398     } /* end of k2 loop */
399     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
400 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
401 caltinay 3731 /*** GENERATOR SNIP_GRAD_FACES TOP */
402 caltinay 3722 #pragma omp parallel
403     {
404     if (m_faceOffset[0] > -1) {
405     #pragma omp for nowait
406     for (index_t k2=0; k2 < m_NE2; ++k2) {
407     for (index_t k1=0; k1 < m_NE1; ++k1) {
408     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
409     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
410     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
411     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
412     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
413     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
414     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
415     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
416     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
417     for (index_t i=0; i < numComp; ++i) {
418 caltinay 3731 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
419     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
420     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
421     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
422     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
423     o[INDEX3(i,1,0,numComp,3)] = V0;
424     o[INDEX3(i,2,0,numComp,3)] = V2;
425     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
426     o[INDEX3(i,1,1,numComp,3)] = V0;
427     o[INDEX3(i,2,1,numComp,3)] = V3;
428     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
429     o[INDEX3(i,1,2,numComp,3)] = V1;
430     o[INDEX3(i,2,2,numComp,3)] = V2;
431     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
432     o[INDEX3(i,1,3,numComp,3)] = V1;
433     o[INDEX3(i,2,3,numComp,3)] = V3;
434 caltinay 3722 } /* end of component loop i */
435     } /* end of k1 loop */
436     } /* end of k2 loop */
437     } /* end of face 0 */
438     if (m_faceOffset[1] > -1) {
439     #pragma omp for nowait
440     for (index_t k2=0; k2 < m_NE2; ++k2) {
441     for (index_t k1=0; k1 < m_NE1; ++k1) {
442     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
443     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
444     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
445     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
446     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
447     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
448     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
449     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
450     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
451     for (index_t i=0; i < numComp; ++i) {
452 caltinay 3731 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
453     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
454     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
455     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
456     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
457     o[INDEX3(i,1,0,numComp,3)] = V0;
458     o[INDEX3(i,2,0,numComp,3)] = V2;
459     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
460     o[INDEX3(i,1,1,numComp,3)] = V0;
461     o[INDEX3(i,2,1,numComp,3)] = V3;
462     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
463     o[INDEX3(i,1,2,numComp,3)] = V1;
464     o[INDEX3(i,2,2,numComp,3)] = V2;
465     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
466     o[INDEX3(i,1,3,numComp,3)] = V1;
467     o[INDEX3(i,2,3,numComp,3)] = V3;
468 caltinay 3722 } /* end of component loop i */
469     } /* end of k1 loop */
470     } /* end of k2 loop */
471     } /* end of face 1 */
472     if (m_faceOffset[2] > -1) {
473     #pragma omp for nowait
474     for (index_t k2=0; k2 < m_NE2; ++k2) {
475     for (index_t k0=0; k0 < m_NE0; ++k0) {
476     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
477     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
478     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
479     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
480     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
481     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
482     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
483     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
484     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
485     for (index_t i=0; i < numComp; ++i) {
486 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
487     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
488     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
489     o[INDEX3(i,0,0,numComp,3)] = V0;
490     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
491     o[INDEX3(i,2,0,numComp,3)] = V1;
492     o[INDEX3(i,0,1,numComp,3)] = V0;
493     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
494     o[INDEX3(i,2,1,numComp,3)] = V2;
495     o[INDEX3(i,0,2,numComp,3)] = V0;
496     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
497     o[INDEX3(i,2,2,numComp,3)] = V1;
498     o[INDEX3(i,0,3,numComp,3)] = V0;
499     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
500     o[INDEX3(i,2,3,numComp,3)] = V2;
501 caltinay 3722 } /* end of component loop i */
502     } /* end of k0 loop */
503     } /* end of k2 loop */
504     } /* end of face 2 */
505     if (m_faceOffset[3] > -1) {
506     #pragma omp for nowait
507     for (index_t k2=0; k2 < m_NE2; ++k2) {
508     for (index_t k0=0; k0 < m_NE0; ++k0) {
509     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
510     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
511     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
512     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
513     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
514     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
515     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
516     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
517     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
518     for (index_t i=0; i < numComp; ++i) {
519 caltinay 3731 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
520     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
521     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
522     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
523     o[INDEX3(i,0,0,numComp,3)] = V0;
524     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
525     o[INDEX3(i,2,0,numComp,3)] = V2;
526     o[INDEX3(i,0,1,numComp,3)] = V0;
527     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
528     o[INDEX3(i,2,1,numComp,3)] = V3;
529     o[INDEX3(i,0,2,numComp,3)] = V1;
530     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
531     o[INDEX3(i,2,2,numComp,3)] = V2;
532     o[INDEX3(i,0,3,numComp,3)] = V1;
533     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
534     o[INDEX3(i,2,3,numComp,3)] = V3;
535 caltinay 3722 } /* end of component loop i */
536     } /* end of k0 loop */
537     } /* end of k2 loop */
538     } /* end of face 3 */
539     if (m_faceOffset[4] > -1) {
540     #pragma omp for nowait
541 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
542 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
543     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
544     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
545     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
546     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
547     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
548     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
549     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
550     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
551     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
552     for (index_t i=0; i < numComp; ++i) {
553 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
554     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
555     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
556     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
557     o[INDEX3(i,0,0,numComp,3)] = V0;
558     o[INDEX3(i,1,0,numComp,3)] = V2;
559     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
560     o[INDEX3(i,0,1,numComp,3)] = V0;
561     o[INDEX3(i,1,1,numComp,3)] = V3;
562     o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
563     o[INDEX3(i,0,2,numComp,3)] = V1;
564     o[INDEX3(i,1,2,numComp,3)] = V2;
565     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
566     o[INDEX3(i,0,3,numComp,3)] = V1;
567     o[INDEX3(i,1,3,numComp,3)] = V3;
568     o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
569 caltinay 3722 } /* end of component loop i */
570     } /* end of k0 loop */
571 caltinay 3707 } /* end of k1 loop */
572 caltinay 3722 } /* end of face 4 */
573     if (m_faceOffset[5] > -1) {
574     #pragma omp for nowait
575 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
576 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
577     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
578     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
579     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
580     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
581     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
582     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
583     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
584     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
585     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
586     for (index_t i=0; i < numComp; ++i) {
587 caltinay 3731 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
588     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
589     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
590     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
591     o[INDEX3(i,0,0,numComp,3)] = V0;
592     o[INDEX3(i,1,0,numComp,3)] = V2;
593     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
594     o[INDEX3(i,0,1,numComp,3)] = V0;
595     o[INDEX3(i,1,1,numComp,3)] = V3;
596     o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
597     o[INDEX3(i,0,2,numComp,3)] = V1;
598     o[INDEX3(i,1,2,numComp,3)] = V2;
599     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
600     o[INDEX3(i,0,3,numComp,3)] = V1;
601     o[INDEX3(i,1,3,numComp,3)] = V3;
602     o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
603 caltinay 3722 } /* end of component loop i */
604     } /* end of k0 loop */
605 caltinay 3707 } /* end of k1 loop */
606 caltinay 3722 } /* end of face 5 */
607     } // end of parallel section
608 caltinay 3707 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
609 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
610 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
611 caltinay 3722 #pragma omp parallel
612     {
613     if (m_faceOffset[0] > -1) {
614     #pragma omp for nowait
615     for (index_t k2=0; k2 < m_NE2; ++k2) {
616     for (index_t k1=0; k1 < m_NE1; ++k1) {
617     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
618     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
619     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
620     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
621     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
622     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
623     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
624     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
625     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
626     for (index_t i=0; i < numComp; ++i) {
627 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
628     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
629     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
630 caltinay 3722 } /* end of component loop i */
631     } /* end of k1 loop */
632     } /* end of k2 loop */
633     } /* end of face 0 */
634     if (m_faceOffset[1] > -1) {
635     #pragma omp for nowait
636     for (index_t k2=0; k2 < m_NE2; ++k2) {
637     for (index_t k1=0; k1 < m_NE1; ++k1) {
638     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
639     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
640     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
641     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
642     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
643     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
644     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
645     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
646     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
647     for (index_t i=0; i < numComp; ++i) {
648 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
649     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
650     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
651 caltinay 3722 } /* end of component loop i */
652     } /* end of k1 loop */
653     } /* end of k2 loop */
654     } /* end of face 1 */
655     if (m_faceOffset[2] > -1) {
656     #pragma omp for nowait
657     for (index_t k2=0; k2 < m_NE2; ++k2) {
658     for (index_t k0=0; k0 < m_NE0; ++k0) {
659     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
660     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
661     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
662     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
663     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
664     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
665     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
666     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
667     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
668     for (index_t i=0; i < numComp; ++i) {
669 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
670     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
671     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
672 caltinay 3722 } /* end of component loop i */
673     } /* end of k0 loop */
674     } /* end of k2 loop */
675     } /* end of face 2 */
676     if (m_faceOffset[3] > -1) {
677     #pragma omp for nowait
678     for (index_t k2=0; k2 < m_NE2; ++k2) {
679     for (index_t k0=0; k0 < m_NE0; ++k0) {
680     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
681     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
682     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
683     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
684     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
685     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
686     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
687     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
688     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
689     for (index_t i=0; i < numComp; ++i) {
690 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
691     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
692     o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
693 caltinay 3722 } /* end of component loop i */
694     } /* end of k0 loop */
695     } /* end of k2 loop */
696     } /* end of face 3 */
697     if (m_faceOffset[4] > -1) {
698     #pragma omp for nowait
699 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
700 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
701     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
702     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
703     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
704     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
705     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
706     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
707     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
708     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
709     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
710     for (index_t i=0; i < numComp; ++i) {
711 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
712     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
713     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
714 caltinay 3722 } /* end of component loop i */
715     } /* end of k0 loop */
716 caltinay 3711 } /* end of k1 loop */
717 caltinay 3722 } /* end of face 4 */
718     if (m_faceOffset[5] > -1) {
719     #pragma omp for nowait
720 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
721 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
722     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
723     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
724     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
725     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
726     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
727     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
728     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
729     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
730     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
731     for (index_t i=0; i < numComp; ++i) {
732 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
733     o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
734     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
735 caltinay 3722 } /* end of component loop i */
736     } /* end of k0 loop */
737 caltinay 3711 } /* end of k1 loop */
738 caltinay 3722 } /* end of face 5 */
739     } // end of parallel section
740 caltinay 3711 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
741 caltinay 3703 } else {
742 caltinay 3707 stringstream msg;
743     msg << "setToGradient() not implemented for "
744     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
745     throw RipleyException(msg.str());
746 caltinay 3703 }
747     }
748    
749 caltinay 3713 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
750     {
751     escript::Data& in = *const_cast<escript::Data*>(&arg);
752     const dim_t numComp = in.getDataPointSize();
753     const double h0 = m_l0/m_gNE0;
754     const double h1 = m_l1/m_gNE1;
755     const double h2 = m_l2/m_gNE2;
756     if (arg.getFunctionSpace().getTypeCode() == Elements) {
757     const double w_0 = h0*h1*h2/8.;
758     #pragma omp parallel
759     {
760     vector<double> int_local(numComp, 0);
761 caltinay 3722 #pragma omp for nowait
762 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
763     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
764     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
765     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
766     for (index_t i=0; i < numComp; ++i) {
767     const register double f_0 = f[INDEX2(i,0,numComp)];
768     const register double f_1 = f[INDEX2(i,1,numComp)];
769     const register double f_2 = f[INDEX2(i,2,numComp)];
770     const register double f_3 = f[INDEX2(i,3,numComp)];
771     const register double f_4 = f[INDEX2(i,4,numComp)];
772     const register double f_5 = f[INDEX2(i,5,numComp)];
773     const register double f_6 = f[INDEX2(i,6,numComp)];
774     const register double f_7 = f[INDEX2(i,7,numComp)];
775     int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
776     } /* end of component loop i */
777     } /* end of k0 loop */
778     } /* end of k1 loop */
779     } /* end of k2 loop */
780    
781     #pragma omp critical
782     for (index_t i=0; i<numComp; i++)
783     integrals[i]+=int_local[i];
784 caltinay 3722 } // end of parallel section
785 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
786     const double w_0 = h0*h1*h2;
787     #pragma omp parallel
788     {
789     vector<double> int_local(numComp, 0);
790 caltinay 3722 #pragma omp for nowait
791 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
792     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
793     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
794     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
795     for (index_t i=0; i < numComp; ++i) {
796     int_local[i]+=f[i]*w_0;
797     } /* end of component loop i */
798     } /* end of k0 loop */
799     } /* end of k1 loop */
800     } /* end of k2 loop */
801    
802     #pragma omp critical
803     for (index_t i=0; i<numComp; i++)
804     integrals[i]+=int_local[i];
805 caltinay 3722 } // end of parallel section
806 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
807     const double w_0 = h1*h2/4.;
808     const double w_1 = h0*h2/4.;
809     const double w_2 = h0*h1/4.;
810     #pragma omp parallel
811     {
812     vector<double> int_local(numComp, 0);
813     if (m_faceOffset[0] > -1) {
814 caltinay 3722 #pragma omp for nowait
815 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
816     for (index_t k1=0; k1 < m_NE1; ++k1) {
817     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
818     for (index_t i=0; i < numComp; ++i) {
819     const register double f_0 = f[INDEX2(i,0,numComp)];
820     const register double f_1 = f[INDEX2(i,1,numComp)];
821     const register double f_2 = f[INDEX2(i,2,numComp)];
822     const register double f_3 = f[INDEX2(i,3,numComp)];
823     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
824     } /* end of component loop i */
825     } /* end of k1 loop */
826     } /* end of k2 loop */
827     }
828    
829     if (m_faceOffset[1] > -1) {
830 caltinay 3722 #pragma omp for nowait
831 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
832     for (index_t k1=0; k1 < m_NE1; ++k1) {
833     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
834     for (index_t i=0; i < numComp; ++i) {
835     const register double f_0 = f[INDEX2(i,0,numComp)];
836     const register double f_1 = f[INDEX2(i,1,numComp)];
837     const register double f_2 = f[INDEX2(i,2,numComp)];
838     const register double f_3 = f[INDEX2(i,3,numComp)];
839     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
840     } /* end of component loop i */
841     } /* end of k1 loop */
842     } /* end of k2 loop */
843     }
844    
845     if (m_faceOffset[2] > -1) {
846 caltinay 3722 #pragma omp for nowait
847 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
848     for (index_t k0=0; k0 < m_NE0; ++k0) {
849     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
850     for (index_t i=0; i < numComp; ++i) {
851     const register double f_0 = f[INDEX2(i,0,numComp)];
852     const register double f_1 = f[INDEX2(i,1,numComp)];
853     const register double f_2 = f[INDEX2(i,2,numComp)];
854     const register double f_3 = f[INDEX2(i,3,numComp)];
855     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
856     } /* end of component loop i */
857     } /* end of k1 loop */
858     } /* end of k2 loop */
859     }
860    
861     if (m_faceOffset[3] > -1) {
862 caltinay 3722 #pragma omp for nowait
863 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
864     for (index_t k0=0; k0 < m_NE0; ++k0) {
865     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
866     for (index_t i=0; i < numComp; ++i) {
867     const register double f_0 = f[INDEX2(i,0,numComp)];
868     const register double f_1 = f[INDEX2(i,1,numComp)];
869     const register double f_2 = f[INDEX2(i,2,numComp)];
870     const register double f_3 = f[INDEX2(i,3,numComp)];
871     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
872     } /* end of component loop i */
873     } /* end of k1 loop */
874     } /* end of k2 loop */
875     }
876    
877     if (m_faceOffset[4] > -1) {
878 caltinay 3722 #pragma omp for nowait
879 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
880     for (index_t k0=0; k0 < m_NE0; ++k0) {
881     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
882     for (index_t i=0; i < numComp; ++i) {
883     const register double f_0 = f[INDEX2(i,0,numComp)];
884     const register double f_1 = f[INDEX2(i,1,numComp)];
885     const register double f_2 = f[INDEX2(i,2,numComp)];
886     const register double f_3 = f[INDEX2(i,3,numComp)];
887     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
888     } /* end of component loop i */
889     } /* end of k1 loop */
890     } /* end of k2 loop */
891     }
892    
893     if (m_faceOffset[5] > -1) {
894 caltinay 3722 #pragma omp for nowait
895 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
896     for (index_t k0=0; k0 < m_NE0; ++k0) {
897     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
898     for (index_t i=0; i < numComp; ++i) {
899     const register double f_0 = f[INDEX2(i,0,numComp)];
900     const register double f_1 = f[INDEX2(i,1,numComp)];
901     const register double f_2 = f[INDEX2(i,2,numComp)];
902     const register double f_3 = f[INDEX2(i,3,numComp)];
903     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
904     } /* end of component loop i */
905     } /* end of k1 loop */
906     } /* end of k2 loop */
907     }
908    
909     #pragma omp critical
910     for (index_t i=0; i<numComp; i++)
911     integrals[i]+=int_local[i];
912 caltinay 3722 } // end of parallel section
913 caltinay 3713
914     } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
915     const double w_0 = h1*h2;
916     const double w_1 = h0*h2;
917     const double w_2 = h0*h1;
918     #pragma omp parallel
919     {
920     vector<double> int_local(numComp, 0);
921     if (m_faceOffset[0] > -1) {
922 caltinay 3722 #pragma omp for nowait
923 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
924     for (index_t k1=0; k1 < m_NE1; ++k1) {
925     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
926     for (index_t i=0; i < numComp; ++i) {
927     int_local[i]+=f[i]*w_0;
928     } /* end of component loop i */
929     } /* end of k1 loop */
930     } /* end of k2 loop */
931     }
932    
933     if (m_faceOffset[1] > -1) {
934 caltinay 3722 #pragma omp for nowait
935 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
936     for (index_t k1=0; k1 < m_NE1; ++k1) {
937     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
938     for (index_t i=0; i < numComp; ++i) {
939     int_local[i]+=f[i]*w_0;
940     } /* end of component loop i */
941     } /* end of k1 loop */
942     } /* end of k2 loop */
943     }
944    
945     if (m_faceOffset[2] > -1) {
946 caltinay 3722 #pragma omp for nowait
947 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
948     for (index_t k0=0; k0 < m_NE0; ++k0) {
949     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
950     for (index_t i=0; i < numComp; ++i) {
951     int_local[i]+=f[i]*w_1;
952     } /* end of component loop i */
953     } /* end of k1 loop */
954     } /* end of k2 loop */
955     }
956    
957     if (m_faceOffset[3] > -1) {
958 caltinay 3722 #pragma omp for nowait
959 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
960     for (index_t k0=0; k0 < m_NE0; ++k0) {
961     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
962     for (index_t i=0; i < numComp; ++i) {
963     int_local[i]+=f[i]*w_1;
964     } /* end of component loop i */
965     } /* end of k1 loop */
966     } /* end of k2 loop */
967     }
968    
969     if (m_faceOffset[4] > -1) {
970 caltinay 3722 #pragma omp for nowait
971 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
972     for (index_t k0=0; k0 < m_NE0; ++k0) {
973     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
974     for (index_t i=0; i < numComp; ++i) {
975     int_local[i]+=f[i]*w_2;
976     } /* end of component loop i */
977     } /* end of k1 loop */
978     } /* end of k2 loop */
979     }
980    
981     if (m_faceOffset[5] > -1) {
982 caltinay 3722 #pragma omp for nowait
983 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
984     for (index_t k0=0; k0 < m_NE0; ++k0) {
985     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
986     for (index_t i=0; i < numComp; ++i) {
987     int_local[i]+=f[i]*w_2;
988     } /* end of component loop i */
989     } /* end of k1 loop */
990     } /* end of k2 loop */
991     }
992    
993     #pragma omp critical
994     for (index_t i=0; i<numComp; i++)
995     integrals[i]+=int_local[i];
996 caltinay 3722 } // end of parallel section
997 caltinay 3713
998     } else {
999     stringstream msg;
1000     msg << "setToIntegrals() not implemented for "
1001     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
1002     throw RipleyException(msg.str());
1003     }
1004     }
1005    
1006 caltinay 3722 void Brick::setToNormal(escript::Data& out) const
1007     {
1008     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
1009     #pragma omp parallel
1010     {
1011     if (m_faceOffset[0] > -1) {
1012     #pragma omp for nowait
1013     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1014     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1015     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1016     // set vector at four quadrature points
1017     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1018     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1019     *o++ = -1.; *o++ = 0.; *o++ = 0.;
1020     *o++ = -1.; *o++ = 0.; *o = 0.;
1021     }
1022     }
1023     }
1024    
1025     if (m_faceOffset[1] > -1) {
1026     #pragma omp for nowait
1027     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1028     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1029     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1030     // set vector at four quadrature points
1031     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1032     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1033     *o++ = 1.; *o++ = 0.; *o++ = 0.;
1034     *o++ = 1.; *o++ = 0.; *o = 0.;
1035     }
1036     }
1037     }
1038    
1039     if (m_faceOffset[2] > -1) {
1040     #pragma omp for nowait
1041     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1042     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1043     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1044     // set vector at four quadrature points
1045     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1046     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1047     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1048     *o++ = 0.; *o++ = -1.; *o = 0.;
1049     }
1050     }
1051     }
1052    
1053     if (m_faceOffset[3] > -1) {
1054     #pragma omp for nowait
1055     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1056     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1057     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1058     // set vector at four quadrature points
1059     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1060     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1061     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1062     *o++ = 0.; *o++ = 1.; *o = 0.;
1063     }
1064     }
1065     }
1066    
1067     if (m_faceOffset[4] > -1) {
1068     #pragma omp for nowait
1069     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1070     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1071     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1072     // set vector at four quadrature points
1073     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1074     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1075     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1076     *o++ = 0.; *o++ = 0.; *o = -1.;
1077     }
1078     }
1079     }
1080    
1081     if (m_faceOffset[5] > -1) {
1082     #pragma omp for nowait
1083     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1084     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1085     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1086     // set vector at four quadrature points
1087     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1088     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1089     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1090     *o++ = 0.; *o++ = 0.; *o = 1.;
1091     }
1092     }
1093     }
1094     } // end of parallel section
1095     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1096     #pragma omp parallel
1097     {
1098     if (m_faceOffset[0] > -1) {
1099     #pragma omp for nowait
1100     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1101     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1102     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1103     *o++ = -1.;
1104     *o++ = 0.;
1105     *o = 0.;
1106     }
1107     }
1108     }
1109    
1110     if (m_faceOffset[1] > -1) {
1111     #pragma omp for nowait
1112     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1113     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1114     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1115     *o++ = 1.;
1116     *o++ = 0.;
1117     *o = 0.;
1118     }
1119     }
1120     }
1121    
1122     if (m_faceOffset[2] > -1) {
1123     #pragma omp for nowait
1124     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1125     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1126     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1127     *o++ = 0.;
1128     *o++ = -1.;
1129     *o = 0.;
1130     }
1131     }
1132     }
1133    
1134     if (m_faceOffset[3] > -1) {
1135     #pragma omp for nowait
1136     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1137     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1138     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1139     *o++ = 0.;
1140     *o++ = 1.;
1141     *o = 0.;
1142     }
1143     }
1144     }
1145    
1146     if (m_faceOffset[4] > -1) {
1147     #pragma omp for nowait
1148     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1149     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1150     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1151     *o++ = 0.;
1152     *o++ = 0.;
1153     *o = -1.;
1154     }
1155     }
1156     }
1157    
1158     if (m_faceOffset[5] > -1) {
1159     #pragma omp for nowait
1160     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1161     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1162     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1163     *o++ = 0.;
1164     *o++ = 0.;
1165     *o = 1.;
1166     }
1167     }
1168     }
1169     } // end of parallel section
1170    
1171     } else {
1172     stringstream msg;
1173     msg << "setToNormal() not implemented for "
1174     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1175     throw RipleyException(msg.str());
1176     }
1177     }
1178    
1179 caltinay 3691 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1180     bool reducedColOrder) const
1181     {
1182     if (reducedRowOrder || reducedColOrder)
1183     throw RipleyException("getPattern() not implemented for reduced order");
1184    
1185     throw RipleyException("getPattern() not implemented");
1186     }
1187    
1188     void Brick::Print_Mesh_Info(const bool full) const
1189     {
1190     RipleyDomain::Print_Mesh_Info(full);
1191     if (full) {
1192     cout << " Id Coordinates" << endl;
1193     cout.precision(15);
1194     cout.setf(ios::scientific, ios::floatfield);
1195 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1196     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1197     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1198 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
1199     cout << " " << setw(5) << m_nodeId[i]
1200 caltinay 3698 << " " << xdx.first+(i%m_N0)*xdx.second
1201     << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1202     << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1203 caltinay 3691 }
1204     }
1205     }
1206    
1207 caltinay 3698 IndexVector Brick::getNumNodesPerDim() const
1208     {
1209     IndexVector ret;
1210     ret.push_back(m_N0);
1211     ret.push_back(m_N1);
1212     ret.push_back(m_N2);
1213     return ret;
1214     }
1215    
1216     IndexVector Brick::getNumElementsPerDim() const
1217     {
1218     IndexVector ret;
1219     ret.push_back(m_NE0);
1220     ret.push_back(m_NE1);
1221     ret.push_back(m_NE2);
1222     return ret;
1223     }
1224    
1225     IndexVector Brick::getNumFacesPerBoundary() const
1226     {
1227     IndexVector ret(6, 0);
1228     //left
1229     if (m_offset0==0)
1230     ret[0]=m_NE1*m_NE2;
1231     //right
1232     if (m_mpiInfo->rank%m_NX==m_NX-1)
1233     ret[1]=m_NE1*m_NE2;
1234     //bottom
1235     if (m_offset1==0)
1236     ret[2]=m_NE0*m_NE2;
1237     //top
1238     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1239     ret[3]=m_NE0*m_NE2;
1240     //front
1241     if (m_offset2==0)
1242     ret[4]=m_NE0*m_NE1;
1243     //back
1244     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1245     ret[5]=m_NE0*m_NE1;
1246     return ret;
1247     }
1248    
1249     pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1250     {
1251     if (dim==0)
1252     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1253     else if (dim==1)
1254     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1255     else if (dim==2)
1256     return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1257    
1258     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1259     }
1260    
1261 caltinay 3744 //protected
1262 caltinay 3750 dim_t Brick::getNumDOF() const
1263     {
1264 caltinay 3753 return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY*(m_gNE2+1)/m_NZ;
1265 caltinay 3750 }
1266    
1267     //protected
1268 caltinay 3691 dim_t Brick::getNumFaceElements() const
1269     {
1270 caltinay 3753 const IndexVector faces = getNumFacesPerBoundary();
1271 caltinay 3691 dim_t n=0;
1272 caltinay 3753 for (size_t i=0; i<faces.size(); i++)
1273     n+=faces[i];
1274 caltinay 3691 return n;
1275     }
1276    
1277     //protected
1278     void Brick::assembleCoordinates(escript::Data& arg) const
1279     {
1280     escriptDataC x = arg.getDataC();
1281     int numDim = m_numDim;
1282     if (!isDataPointShapeEqual(&x, 1, &numDim))
1283     throw RipleyException("setToX: Invalid Data object shape");
1284     if (!numSamplesEqual(&x, 1, getNumNodes()))
1285     throw RipleyException("setToX: Illegal number of samples in Data object");
1286    
1287 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1288     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1289     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1290 caltinay 3691 arg.requireWrite();
1291     #pragma omp parallel for
1292     for (dim_t i2 = 0; i2 < m_N2; i2++) {
1293     for (dim_t i1 = 0; i1 < m_N1; i1++) {
1294     for (dim_t i0 = 0; i0 < m_N0; i0++) {
1295     double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1296 caltinay 3698 point[0] = xdx.first+i0*xdx.second;
1297     point[1] = ydy.first+i1*ydy.second;
1298     point[2] = zdz.first+i2*zdz.second;
1299 caltinay 3691 }
1300     }
1301     }
1302     }
1303    
1304     //private
1305     void Brick::populateSampleIds()
1306     {
1307 caltinay 3697 // identifiers are ordered from left to right, bottom to top, front to back
1308 caltinay 3753 // globally
1309 caltinay 3698
1310     // build node distribution vector first.
1311     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1312     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1313 caltinay 3753 const dim_t numDOF=getNumDOF();
1314     for (dim_t k=1; k<m_mpiInfo->size; k++) {
1315     m_nodeDistribution[k]=k*numDOF;
1316 caltinay 3698 }
1317     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1318 caltinay 3691 m_nodeId.resize(getNumNodes());
1319 caltinay 3753 m_dofId.resize(numDOF);
1320     m_elementId.resize(getNumElements());
1321     m_faceId.resize(getNumFaceElements());
1322 caltinay 3698
1323 caltinay 3753 #pragma omp parallel
1324     {
1325     #pragma omp for nowait
1326     // nodes
1327     for (dim_t i2=0; i2<m_N2; i2++) {
1328     for (dim_t i1=0; i1<m_N1; i1++) {
1329     for (dim_t i0=0; i0<m_N0; i0++) {
1330     m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] =
1331     (m_offset2+i2)*(m_gNE0+1)*(m_gNE1+1)
1332     +(m_offset1+i1)*(m_gNE0+1)
1333     +m_offset0+i0;
1334     }
1335 caltinay 3698 }
1336     }
1337 caltinay 3697
1338 caltinay 3753 // degrees of freedom
1339     #pragma omp for nowait
1340     for (dim_t k=0; k<numDOF; k++)
1341     m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k;
1342    
1343     // elements
1344     #pragma omp for nowait
1345     for (dim_t k=0; k<getNumElements(); k++)
1346     m_elementId[k]=k;
1347    
1348     // face elements
1349     #pragma omp for
1350     for (dim_t k=0; k<getNumFaceElements(); k++)
1351     m_faceId[k]=k;
1352     } // end parallel section
1353    
1354 caltinay 3735 m_nodeTags.assign(getNumNodes(), 0);
1355     updateTagsInUse(Nodes);
1356 caltinay 3698
1357 caltinay 3735 m_elementTags.assign(getNumElements(), 0);
1358     updateTagsInUse(Elements);
1359 caltinay 3697
1360 caltinay 3722 // generate face offset vector and set face tags
1361 caltinay 3704 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1362 caltinay 3722 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1363     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1364 caltinay 3704 m_faceOffset.assign(facesPerEdge.size(), -1);
1365 caltinay 3722 m_faceTags.clear();
1366 caltinay 3704 index_t offset=0;
1367     for (size_t i=0; i<facesPerEdge.size(); i++) {
1368     if (facesPerEdge[i]>0) {
1369     m_faceOffset[i]=offset;
1370     offset+=facesPerEdge[i];
1371 caltinay 3722 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1372 caltinay 3704 }
1373     }
1374 caltinay 3722 setTagMap("left", LEFT);
1375     setTagMap("right", RIGHT);
1376     setTagMap("bottom", BOTTOM);
1377     setTagMap("top", TOP);
1378     setTagMap("front", FRONT);
1379     setTagMap("back", BACK);
1380     updateTagsInUse(FaceElements);
1381 caltinay 3691 }
1382    
1383 caltinay 3703 //protected
1384 caltinay 3711 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1385     bool reduced) const
1386 caltinay 3703 {
1387     const dim_t numComp = in.getDataPointSize();
1388 caltinay 3711 if (reduced) {
1389 caltinay 3731 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1390     const double c0 = .125;
1391 caltinay 3703 #pragma omp parallel for
1392 caltinay 3711 for (index_t k2=0; k2 < m_NE2; ++k2) {
1393     for (index_t k1=0; k1 < m_NE1; ++k1) {
1394     for (index_t k0=0; k0 < m_NE0; ++k0) {
1395     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1396     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1397     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1398     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1399     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1400     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1401     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1402     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1403     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1404     for (index_t i=0; i < numComp; ++i) {
1405 caltinay 3731 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1406 caltinay 3711 } /* end of component loop i */
1407     } /* end of k0 loop */
1408     } /* end of k1 loop */
1409     } /* end of k2 loop */
1410     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1411     } else {
1412 caltinay 3731 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1413     const double c0 = .0094373878376559314545;
1414     const double c1 = .035220810900864519624;
1415     const double c2 = .13144585576580214704;
1416     const double c3 = .49056261216234406855;
1417 caltinay 3711 #pragma omp parallel for
1418     for (index_t k2=0; k2 < m_NE2; ++k2) {
1419     for (index_t k1=0; k1 < m_NE1; ++k1) {
1420     for (index_t k0=0; k0 < m_NE0; ++k0) {
1421     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1422     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1423     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1424     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1425     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1426     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1427     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1428     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1429     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1430     for (index_t i=0; i < numComp; ++i) {
1431 caltinay 3731 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1432     o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1433     o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1434     o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1435     o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1436     o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1437     o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1438     o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1439 caltinay 3711 } /* end of component loop i */
1440     } /* end of k0 loop */
1441     } /* end of k1 loop */
1442     } /* end of k2 loop */
1443     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1444     }
1445 caltinay 3703 }
1446    
1447     //protected
1448 caltinay 3711 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1449     bool reduced) const
1450 caltinay 3703 {
1451 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1452 caltinay 3711 if (reduced) {
1453 caltinay 3731 const double c0 = .25;
1454     #pragma omp parallel
1455     {
1456     /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1457     if (m_faceOffset[0] > -1) {
1458     #pragma omp for nowait
1459     for (index_t k2=0; k2 < m_NE2; ++k2) {
1460     for (index_t k1=0; k1 < m_NE1; ++k1) {
1461     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1462     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1463     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1464     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1465     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1466     for (index_t i=0; i < numComp; ++i) {
1467     o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1468     } /* end of component loop i */
1469     } /* end of k1 loop */
1470     } /* end of k2 loop */
1471     } /* end of face 0 */
1472     if (m_faceOffset[1] > -1) {
1473     #pragma omp for nowait
1474     for (index_t k2=0; k2 < m_NE2; ++k2) {
1475     for (index_t k1=0; k1 < m_NE1; ++k1) {
1476     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1477     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1478     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1479     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1480     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1481     for (index_t i=0; i < numComp; ++i) {
1482     o[INDEX2(i,numComp,0)] = c0*(f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1483     } /* end of component loop i */
1484     } /* end of k1 loop */
1485     } /* end of k2 loop */
1486     } /* end of face 1 */
1487     if (m_faceOffset[2] > -1) {
1488     #pragma omp for nowait
1489     for (index_t k2=0; k2 < m_NE2; ++k2) {
1490     for (index_t k0=0; k0 < m_NE0; ++k0) {
1491     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1492     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1493     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1494     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1495     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1496     for (index_t i=0; i < numComp; ++i) {
1497     o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_100[i] + f_101[i]);
1498     } /* end of component loop i */
1499     } /* end of k0 loop */
1500     } /* end of k2 loop */
1501     } /* end of face 2 */
1502     if (m_faceOffset[3] > -1) {
1503     #pragma omp for nowait
1504     for (index_t k2=0; k2 < m_NE2; ++k2) {
1505     for (index_t k0=0; k0 < m_NE0; ++k0) {
1506     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1507     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1508     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1509     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1510     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1511     for (index_t i=0; i < numComp; ++i) {
1512     o[INDEX2(i,numComp,0)] = c0*(f_010[i] + f_011[i] + f_110[i] + f_111[i]);
1513     } /* end of component loop i */
1514     } /* end of k0 loop */
1515     } /* end of k2 loop */
1516     } /* end of face 3 */
1517     if (m_faceOffset[4] > -1) {
1518     #pragma omp for nowait
1519 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1520 caltinay 3731 for (index_t k0=0; k0 < m_NE0; ++k0) {
1521     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1522     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1523     const register double* f_100 = in.getSampl