/[escript]/branches/diaplayground/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/diaplayground/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3735 - (hide annotations)
Fri Dec 9 05:03:08 2011 UTC (7 years, 3 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 93685 byte(s)
Set initial node/element/face tags to 0 and updated tests.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Brick.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33     int d1, int d2) :
34     RipleyDomain(3),
35     m_gNE0(n0),
36     m_gNE1(n1),
37     m_gNE2(n2),
38     m_l0(l0),
39     m_l1(l1),
40     m_l2(l2),
41     m_NX(d0),
42     m_NY(d1),
43     m_NZ(d2)
44     {
45     // ensure number of subdivisions is valid and nodes can be distributed
46     // among number of ranks
47     if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48     throw RipleyException("Invalid number of spatial subdivisions");
49    
50     if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52    
53     // local number of elements
54     m_NE0 = n0/m_NX;
55     m_NE1 = n1/m_NY;
56     m_NE2 = n2/m_NZ;
57     // local number of nodes (not necessarily owned)
58     m_N0 = m_NE0+1;
59     m_N1 = m_NE1+1;
60     m_N2 = m_NE2+1;
61     // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63     m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64     m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65     populateSampleIds();
66     }
67    
68    
69     Brick::~Brick()
70     {
71     }
72    
73     string Brick::getDescription() const
74     {
75     return "ripley::Brick";
76     }
77    
78     bool Brick::operator==(const AbstractDomain& other) const
79     {
80     if (dynamic_cast<const Brick*>(&other))
81     return this==&other;
82    
83     return false;
84     }
85    
86     void Brick::dump(const string& fileName) const
87     {
88     #if USE_SILO
89     string fn(fileName);
90     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
91     fn+=".silo";
92     }
93    
94     const int NUM_SILO_FILES = 1;
95     const char* blockDirFmt = "/block%04d";
96     int driver=DB_HDF5;
97     string siloPath;
98     DBfile* dbfile = NULL;
99    
100     #ifdef ESYS_MPI
101     PMPIO_baton_t* baton = NULL;
102     #endif
103    
104     if (m_mpiInfo->size > 1) {
105     #ifdef ESYS_MPI
106     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
107     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
108     PMPIO_DefaultClose, (void*)&driver);
109     // try the fallback driver in case of error
110     if (!baton && driver != DB_PDB) {
111     driver = DB_PDB;
112     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
113     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
114     PMPIO_DefaultClose, (void*)&driver);
115     }
116     if (baton) {
117     char str[64];
118     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
119     siloPath = str;
120     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
121     }
122     #endif
123     } else {
124     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
125     getDescription().c_str(), driver);
126     // try the fallback driver in case of error
127     if (!dbfile && driver != DB_PDB) {
128     driver = DB_PDB;
129     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130     getDescription().c_str(), driver);
131     }
132     }
133    
134     if (!dbfile)
135     throw RipleyException("dump: Could not create Silo file");
136    
137     /*
138     if (driver==DB_HDF5) {
139     // gzip level 1 already provides good compression with minimal
140     // performance penalty. Some tests showed that gzip levels >3 performed
141     // rather badly on escript data both in terms of time and space
142     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
143     }
144     */
145    
146     boost::scoped_ptr<double> x(new double[m_N0]);
147     boost::scoped_ptr<double> y(new double[m_N1]);
148     boost::scoped_ptr<double> z(new double[m_N2]);
149     double* coords[3] = { x.get(), y.get(), z.get() };
150 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
151     pair<double,double> ydy = getFirstCoordAndSpacing(1);
152     pair<double,double> zdz = getFirstCoordAndSpacing(2);
153 caltinay 3691 #pragma omp parallel
154     {
155     #pragma omp for
156     for (dim_t i0 = 0; i0 < m_N0; i0++) {
157 caltinay 3698 coords[0][i0]=xdx.first+i0*xdx.second;
158 caltinay 3691 }
159     #pragma omp for
160     for (dim_t i1 = 0; i1 < m_N1; i1++) {
161 caltinay 3698 coords[1][i1]=ydy.first+i1*ydy.second;
162 caltinay 3691 }
163     #pragma omp for
164     for (dim_t i2 = 0; i2 < m_N2; i2++) {
165 caltinay 3698 coords[2][i2]=zdz.first+i2*zdz.second;
166 caltinay 3691 }
167     }
168 caltinay 3698 IndexVector dims = getNumNodesPerDim();
169     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
170 caltinay 3691 DB_COLLINEAR, NULL);
171    
172 caltinay 3698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
173     NULL, 0, DB_INT, DB_NODECENT, NULL);
174 caltinay 3691
175 caltinay 3698 // write element ids
176     dims = getNumElementsPerDim();
177     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
178     &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
179    
180     // rank 0 writes multimesh and multivar
181 caltinay 3691 if (m_mpiInfo->rank == 0) {
182     vector<string> tempstrings;
183     vector<char*> names;
184     for (dim_t i=0; i<m_mpiInfo->size; i++) {
185     stringstream path;
186     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
187     tempstrings.push_back(path.str());
188     names.push_back((char*)tempstrings.back().c_str());
189     }
190     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
191     DBSetDir(dbfile, "/");
192     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     types.assign(m_mpiInfo->size, DB_QUADVAR);
203     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
204     &types[0], NULL);
205 caltinay 3698 tempstrings.clear();
206     names.clear();
207     for (dim_t i=0; i<m_mpiInfo->size; i++) {
208     stringstream path;
209     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
210     tempstrings.push_back(path.str());
211     names.push_back((char*)tempstrings.back().c_str());
212     }
213     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
214     &types[0], NULL);
215 caltinay 3691 }
216    
217     if (m_mpiInfo->size > 1) {
218     #ifdef ESYS_MPI
219     PMPIO_HandOffBaton(baton, dbfile);
220     PMPIO_Finish(baton);
221     #endif
222     } else {
223     DBClose(dbfile);
224     }
225    
226     #else // USE_SILO
227     throw RipleyException("dump(): no Silo support");
228     #endif
229     }
230    
231     const int* Brick::borrowSampleReferenceIDs(int fsType) const
232     {
233 caltinay 3697 switch (fsType) {
234     case Nodes:
235 caltinay 3733 case DegreesOfFreedom: //FIXME
236     case ReducedDegreesOfFreedom: //FIXME
237 caltinay 3697 return &m_nodeId[0];
238     case Elements:
239 caltinay 3733 case ReducedElements:
240 caltinay 3697 return &m_elementId[0];
241 caltinay 3733 case ReducedFaceElements:
242 caltinay 3697 return &m_faceId[0];
243     default:
244     break;
245     }
246 caltinay 3691
247 caltinay 3697 stringstream msg;
248     msg << "borrowSampleReferenceIDs() not implemented for function space type "
249     << fsType;
250     throw RipleyException(msg.str());
251 caltinay 3691 }
252    
253     bool Brick::ownSample(int fsCode, index_t id) const
254     {
255     #ifdef ESYS_MPI
256     if (fsCode == Nodes) {
257 caltinay 3698 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
258     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
259 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
260     } else
261     throw RipleyException("ownSample() only implemented for Nodes");
262     #else
263     return true;
264     #endif
265     }
266    
267 caltinay 3703 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
268     {
269     escript::Data& in = *const_cast<escript::Data*>(&cIn);
270     const dim_t numComp = in.getDataPointSize();
271     const double h0 = m_l0/m_gNE0;
272     const double h1 = m_l1/m_gNE1;
273     const double h2 = m_l1/m_gNE2;
274 caltinay 3731 const double C0 = .044658198738520451079;
275     const double C1 = .16666666666666666667;
276     const double C2 = .21132486540518711775;
277     const double C3 = .25;
278     const double C4 = .5;
279     const double C5 = .62200846792814621559;
280     const double C6 = .78867513459481288225;
281    
282 caltinay 3703 if (out.getFunctionSpace().getTypeCode() == Elements) {
283 caltinay 3731 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
284 caltinay 3703 #pragma omp parallel for
285 caltinay 3707 for (index_t k2=0; k2 < m_NE2; ++k2) {
286     for (index_t k1=0; k1 < m_NE1; ++k1) {
287     for (index_t k0=0; k0 < m_NE0; ++k0) {
288 caltinay 3703 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
289     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
290     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
291     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
292     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
293     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
294     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
295     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
296     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
297     for (index_t i=0; i < numComp; ++i) {
298 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
299     const double V1=((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
300     const double V2=((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
301     const double V3=((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
302     const double V4=((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
303     const double V5=((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
304     const double V6=((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
305     const double V7=((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
306     const double V8=((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
307     const double V9=((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
308     const double V10=((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
309     const double V11=((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
310     o[INDEX3(i,0,0,numComp,3)] = V0;
311     o[INDEX3(i,1,0,numComp,3)] = V4;
312     o[INDEX3(i,2,0,numComp,3)] = V8;
313     o[INDEX3(i,0,1,numComp,3)] = V0;
314     o[INDEX3(i,1,1,numComp,3)] = V5;
315     o[INDEX3(i,2,1,numComp,3)] = V9;
316     o[INDEX3(i,0,2,numComp,3)] = V1;
317     o[INDEX3(i,1,2,numComp,3)] = V4;
318     o[INDEX3(i,2,2,numComp,3)] = V10;
319     o[INDEX3(i,0,3,numComp,3)] = V1;
320     o[INDEX3(i,1,3,numComp,3)] = V5;
321     o[INDEX3(i,2,3,numComp,3)] = V11;
322     o[INDEX3(i,0,4,numComp,3)] = V2;
323     o[INDEX3(i,1,4,numComp,3)] = V6;
324     o[INDEX3(i,2,4,numComp,3)] = V8;
325     o[INDEX3(i,0,5,numComp,3)] = V2;
326     o[INDEX3(i,1,5,numComp,3)] = V7;
327     o[INDEX3(i,2,5,numComp,3)] = V9;
328     o[INDEX3(i,0,6,numComp,3)] = V3;
329     o[INDEX3(i,1,6,numComp,3)] = V6;
330     o[INDEX3(i,2,6,numComp,3)] = V10;
331     o[INDEX3(i,0,7,numComp,3)] = V3;
332     o[INDEX3(i,1,7,numComp,3)] = V7;
333     o[INDEX3(i,2,7,numComp,3)] = V11;
334 caltinay 3703 } /* end of component loop i */
335     } /* end of k0 loop */
336     } /* end of k1 loop */
337     } /* end of k2 loop */
338     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
339 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
340 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
341 caltinay 3711 #pragma omp parallel for
342     for (index_t k2=0; k2 < m_NE2; ++k2) {
343     for (index_t k1=0; k1 < m_NE1; ++k1) {
344     for (index_t k0=0; k0 < m_NE0; ++k0) {
345     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
346     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
347     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
348     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
349     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
350     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
351     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
352     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
353     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
354     for (index_t i=0; i < numComp; ++i) {
355 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
356     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
357     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
358 caltinay 3711 } /* end of component loop i */
359     } /* end of k0 loop */
360     } /* end of k1 loop */
361     } /* end of k2 loop */
362     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
363 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
364 caltinay 3731 /*** GENERATOR SNIP_GRAD_FACES TOP */
365 caltinay 3722 #pragma omp parallel
366     {
367     if (m_faceOffset[0] > -1) {
368     #pragma omp for nowait
369     for (index_t k2=0; k2 < m_NE2; ++k2) {
370     for (index_t k1=0; k1 < m_NE1; ++k1) {
371     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
372     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
373     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
374     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
375     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
376     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
377     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
378     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
379     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
380     for (index_t i=0; i < numComp; ++i) {
381 caltinay 3731 const double V0=((f_010[i]-f_000[i])*C6 + (f_011[i]-f_001[i])*C2) / h1;
382     const double V1=((f_010[i]-f_000[i])*C2 + (f_011[i]-f_001[i])*C6) / h1;
383     const double V2=((f_001[i]-f_000[i])*C6 + (f_010[i]-f_011[i])*C2) / h2;
384     const double V3=((f_001[i]-f_000[i])*C2 + (f_011[i]-f_010[i])*C6) / h2;
385     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
386     o[INDEX3(i,1,0,numComp,3)] = V0;
387     o[INDEX3(i,2,0,numComp,3)] = V2;
388     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
389     o[INDEX3(i,1,1,numComp,3)] = V0;
390     o[INDEX3(i,2,1,numComp,3)] = V3;
391     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
392     o[INDEX3(i,1,2,numComp,3)] = V1;
393     o[INDEX3(i,2,2,numComp,3)] = V2;
394     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
395     o[INDEX3(i,1,3,numComp,3)] = V1;
396     o[INDEX3(i,2,3,numComp,3)] = V3;
397 caltinay 3722 } /* end of component loop i */
398     } /* end of k1 loop */
399     } /* end of k2 loop */
400     } /* end of face 0 */
401     if (m_faceOffset[1] > -1) {
402     #pragma omp for nowait
403     for (index_t k2=0; k2 < m_NE2; ++k2) {
404     for (index_t k1=0; k1 < m_NE1; ++k1) {
405     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
406     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
407     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
408     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
409     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
410     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
411     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
412     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
413     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
414     for (index_t i=0; i < numComp; ++i) {
415 caltinay 3731 const double V0=((f_110[i]-f_100[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
416     const double V1=((f_110[i]-f_100[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
417     const double V2=((f_101[i]-f_100[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
418     const double V3=((f_101[i]-f_100[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
419     o[INDEX3(i,0,0,numComp,3)] = ((f_100[i]-f_000[i])*C5 + (f_111[i]-f_011[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
420     o[INDEX3(i,1,0,numComp,3)] = V0;
421     o[INDEX3(i,2,0,numComp,3)] = V2;
422     o[INDEX3(i,0,1,numComp,3)] = ((f_110[i]-f_010[i])*C5 + (f_101[i]-f_001[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
423     o[INDEX3(i,1,1,numComp,3)] = V0;
424     o[INDEX3(i,2,1,numComp,3)] = V3;
425     o[INDEX3(i,0,2,numComp,3)] = ((f_101[i]-f_001[i])*C5 + (f_110[i]-f_010[i])*C0 + (f_100[i]+f_111[i]-f_000[i]-f_011[i])*C1) / h0;
426     o[INDEX3(i,1,2,numComp,3)] = V1;
427     o[INDEX3(i,2,2,numComp,3)] = V2;
428     o[INDEX3(i,0,3,numComp,3)] = ((f_111[i]-f_011[i])*C5 + (f_100[i]-f_000[i])*C0 + (f_101[i]+f_110[i]-f_001[i]-f_010[i])*C1) / h0;
429     o[INDEX3(i,1,3,numComp,3)] = V1;
430     o[INDEX3(i,2,3,numComp,3)] = V3;
431 caltinay 3722 } /* end of component loop i */
432     } /* end of k1 loop */
433     } /* end of k2 loop */
434     } /* end of face 1 */
435     if (m_faceOffset[2] > -1) {
436     #pragma omp for nowait
437     for (index_t k2=0; k2 < m_NE2; ++k2) {
438     for (index_t k0=0; k0 < m_NE0; ++k0) {
439     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
440     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
441     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
442     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
443     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
444     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
445     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
446     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
447     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
448     for (index_t i=0; i < numComp; ++i) {
449 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_101[i]-f_001[i])*C2) / h0;
450     const double V1=((f_001[i]-f_000[i])*C6 + (f_101[i]-f_100[i])*C2) / h2;
451     const double V2=((f_001[i]-f_000[i])*C2 + (f_101[i]-f_100[i])*C6) / h2;
452     o[INDEX3(i,0,0,numComp,3)] = V0;
453     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
454     o[INDEX3(i,2,0,numComp,3)] = V1;
455     o[INDEX3(i,0,1,numComp,3)] = V0;
456     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
457     o[INDEX3(i,2,1,numComp,3)] = V2;
458     o[INDEX3(i,0,2,numComp,3)] = V0;
459     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
460     o[INDEX3(i,2,2,numComp,3)] = V1;
461     o[INDEX3(i,0,3,numComp,3)] = V0;
462     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
463     o[INDEX3(i,2,3,numComp,3)] = V2;
464 caltinay 3722 } /* end of component loop i */
465     } /* end of k0 loop */
466     } /* end of k2 loop */
467     } /* end of face 2 */
468     if (m_faceOffset[3] > -1) {
469     #pragma omp for nowait
470     for (index_t k2=0; k2 < m_NE2; ++k2) {
471     for (index_t k0=0; k0 < m_NE0; ++k0) {
472     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
473     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
474     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
475     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
476     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
477     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
478     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
479     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
480     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
481     for (index_t i=0; i < numComp; ++i) {
482 caltinay 3731 const double V0=((f_110[i]-f_010[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
483     const double V1=((f_110[i]-f_010[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
484     const double V2=((f_011[i]-f_010[i])*C6 + (f_111[i]-f_110[i])*C2) / h2;
485     const double V3=((f_011[i]-f_010[i])*C2 + (f_111[i]-f_110[i])*C6) / h2;
486     o[INDEX3(i,0,0,numComp,3)] = V0;
487     o[INDEX3(i,1,0,numComp,3)] = ((f_010[i]-f_000[i])*C5 + (f_111[i]-f_101[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
488     o[INDEX3(i,2,0,numComp,3)] = V2;
489     o[INDEX3(i,0,1,numComp,3)] = V0;
490     o[INDEX3(i,1,1,numComp,3)] = ((f_110[i]-f_100[i])*C5 + (f_011[i]-f_001[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
491     o[INDEX3(i,2,1,numComp,3)] = V3;
492     o[INDEX3(i,0,2,numComp,3)] = V1;
493     o[INDEX3(i,1,2,numComp,3)] = ((f_011[i]-f_001[i])*C5 + (f_110[i]-f_100[i])*C0 + (f_010[i]+f_111[i]-f_000[i]-f_101[i])*C1) / h1;
494     o[INDEX3(i,2,2,numComp,3)] = V2;
495     o[INDEX3(i,0,3,numComp,3)] = V1;
496     o[INDEX3(i,1,3,numComp,3)] = ((f_111[i]-f_101[i])*C5 + (f_010[i]-f_000[i])*C0 + (f_011[i]+f_110[i]-f_001[i]-f_100[i])*C1) / h1;
497     o[INDEX3(i,2,3,numComp,3)] = V3;
498 caltinay 3722 } /* end of component loop i */
499     } /* end of k0 loop */
500     } /* end of k2 loop */
501     } /* end of face 3 */
502     if (m_faceOffset[4] > -1) {
503     #pragma omp for nowait
504 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
505 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
506     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
507     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
508     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
509     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
510     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
511     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
512     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
513     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
514     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
515     for (index_t i=0; i < numComp; ++i) {
516 caltinay 3731 const double V0=((f_100[i]-f_000[i])*C6 + (f_110[i]-f_010[i])*C2) / h0;
517     const double V1=((f_100[i]-f_000[i])*C2 + (f_110[i]-f_010[i])*C6) / h0;
518     const double V2=((f_010[i]-f_000[i])*C6 + (f_110[i]-f_100[i])*C2) / h1;
519     const double V3=((f_010[i]-f_000[i])*C2 + (f_110[i]-f_100[i])*C6) / h1;
520     o[INDEX3(i,0,0,numComp,3)] = V0;
521     o[INDEX3(i,1,0,numComp,3)] = V2;
522     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
523     o[INDEX3(i,0,1,numComp,3)] = V0;
524     o[INDEX3(i,1,1,numComp,3)] = V3;
525     o[INDEX3(i,2,1,numComp,3)] = ((f_101[i]-f_100[i])*C5 + (f_011[i]-f_010[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
526     o[INDEX3(i,0,2,numComp,3)] = V1;
527     o[INDEX3(i,1,2,numComp,3)] = V2;
528     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
529     o[INDEX3(i,0,3,numComp,3)] = V1;
530     o[INDEX3(i,1,3,numComp,3)] = V3;
531     o[INDEX3(i,2,3,numComp,3)] = ((f_111[i]-f_110[i])*C5 + (f_001[i]-f_000[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
532 caltinay 3722 } /* end of component loop i */
533     } /* end of k0 loop */
534 caltinay 3707 } /* end of k1 loop */
535 caltinay 3722 } /* end of face 4 */
536     if (m_faceOffset[5] > -1) {
537     #pragma omp for nowait
538 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
539 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
540     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
541     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
542     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
543     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
544     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
545     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
546     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
547     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
548     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
549     for (index_t i=0; i < numComp; ++i) {
550 caltinay 3731 const double V0=((f_101[i]-f_001[i])*C6 + (f_111[i]-f_011[i])*C2) / h0;
551     const double V1=((f_101[i]-f_001[i])*C2 + (f_111[i]-f_011[i])*C6) / h0;
552     const double V2=((f_011[i]-f_001[i])*C6 + (f_111[i]-f_101[i])*C2) / h1;
553     const double V3=((f_011[i]-f_001[i])*C2 + (f_111[i]-f_101[i])*C6) / h1;
554     o[INDEX3(i,0,0,numComp,3)] = V0;
555     o[INDEX3(i,1,0,numComp,3)] = V2;
556     o[INDEX3(i,2,0,numComp,3)] = ((f_001[i]-f_000[i])*C5 + (f_111[i]-f_110[i])*C0 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
557     o[INDEX3(i,0,1,numComp,3)] = V0;
558     o[INDEX3(i,1,1,numComp,3)] = V3;
559     o[INDEX3(i,2,1,numComp,3)] = ((f_011[i]-f_010[i])*C0 + (f_101[i]-f_100[i])*C5 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
560     o[INDEX3(i,0,2,numComp,3)] = V1;
561     o[INDEX3(i,1,2,numComp,3)] = V2;
562     o[INDEX3(i,2,2,numComp,3)] = ((f_011[i]-f_010[i])*C5 + (f_101[i]-f_100[i])*C0 + (f_001[i]+f_111[i]-f_000[i]-f_110[i])*C1) / h2;
563     o[INDEX3(i,0,3,numComp,3)] = V1;
564     o[INDEX3(i,1,3,numComp,3)] = V3;
565     o[INDEX3(i,2,3,numComp,3)] = ((f_001[i]-f_000[i])*C0 + (f_111[i]-f_110[i])*C5 + (f_011[i]+f_101[i]-f_010[i]-f_100[i])*C1) / h2;
566 caltinay 3722 } /* end of component loop i */
567     } /* end of k0 loop */
568 caltinay 3707 } /* end of k1 loop */
569 caltinay 3722 } /* end of face 5 */
570     } // end of parallel section
571 caltinay 3707 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
572 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
573 caltinay 3731 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
574 caltinay 3722 #pragma omp parallel
575     {
576     if (m_faceOffset[0] > -1) {
577     #pragma omp for nowait
578     for (index_t k2=0; k2 < m_NE2; ++k2) {
579     for (index_t k1=0; k1 < m_NE1; ++k1) {
580     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
581     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
582     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
583     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
584     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
585     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
586     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
587     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
588     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
589     for (index_t i=0; i < numComp; ++i) {
590 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
591     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]-f_000[i]-f_001[i])*C4 / h1;
592     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]-f_000[i]-f_010[i])*C4 / h2;
593 caltinay 3722 } /* end of component loop i */
594     } /* end of k1 loop */
595     } /* end of k2 loop */
596     } /* end of face 0 */
597     if (m_faceOffset[1] > -1) {
598     #pragma omp for nowait
599     for (index_t k2=0; k2 < m_NE2; ++k2) {
600     for (index_t k1=0; k1 < m_NE1; ++k1) {
601     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
602     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
603     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
604     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
605     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
606     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
607     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
608     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
609     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
610     for (index_t i=0; i < numComp; ++i) {
611 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_010[i]-f_011[i])*C3 / h0;
612     o[INDEX3(i,1,0,numComp,3)] = (f_110[i]+f_111[i]-f_100[i]-f_101[i])*C4 / h1;
613     o[INDEX3(i,2,0,numComp,3)] = (f_101[i]+f_111[i]-f_100[i]-f_110[i])*C4 / h2;
614 caltinay 3722 } /* end of component loop i */
615     } /* end of k1 loop */
616     } /* end of k2 loop */
617     } /* end of face 1 */
618     if (m_faceOffset[2] > -1) {
619     #pragma omp for nowait
620     for (index_t k2=0; k2 < m_NE2; ++k2) {
621     for (index_t k0=0; k0 < m_NE0; ++k0) {
622     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
623     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
624     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
625     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
626     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
627     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
628     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
629     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
630     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
631     for (index_t i=0; i < numComp; ++i) {
632 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_101[i]-f_000[i]-f_001[i])*C4 / h0;
633     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
634     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_101[i]-f_000[i]-f_100[i])*C4 / h2;
635 caltinay 3722 } /* end of component loop i */
636     } /* end of k0 loop */
637     } /* end of k2 loop */
638     } /* end of face 2 */
639     if (m_faceOffset[3] > -1) {
640     #pragma omp for nowait
641     for (index_t k2=0; k2 < m_NE2; ++k2) {
642     for (index_t k0=0; k0 < m_NE0; ++k0) {
643     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
644     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
645     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
646     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
647     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
648     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
649     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
650     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
651     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
652     for (index_t i=0; i < numComp; ++i) {
653 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_110[i]+f_111[i]-f_010[i]-f_011[i])*C4 / h0;
654     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_011[i]+f_110[i]+f_111[i]-f_000[i]-f_001[i]-f_100[i]-f_101[i])*C3 / h1;
655     o[INDEX3(i,2,0,numComp,3)] = (f_011[i]+f_111[i]-f_010[i]-f_110[i])*C4 / h2;
656 caltinay 3722 } /* end of component loop i */
657     } /* end of k0 loop */
658     } /* end of k2 loop */
659     } /* end of face 3 */
660     if (m_faceOffset[4] > -1) {
661     #pragma omp for nowait
662 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
663 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
664     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
665     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
666     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
667     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
668     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
669     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
670     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
671     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
672     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
673     for (index_t i=0; i < numComp; ++i) {
674 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_100[i]+f_110[i]-f_000[i]-f_010[i])*C4 / h0;
675     o[INDEX3(i,1,0,numComp,3)] = (f_010[i]+f_110[i]-f_000[i]-f_100[i])*C4 / h1;
676     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C4 / h2;
677 caltinay 3722 } /* end of component loop i */
678     } /* end of k0 loop */
679 caltinay 3711 } /* end of k1 loop */
680 caltinay 3722 } /* end of face 4 */
681     if (m_faceOffset[5] > -1) {
682     #pragma omp for nowait
683 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
684 caltinay 3722 for (index_t k0=0; k0 < m_NE0; ++k0) {
685     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
686     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
687     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
688     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
689     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
690     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
691     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
692     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
693     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
694     for (index_t i=0; i < numComp; ++i) {
695 caltinay 3731 o[INDEX3(i,0,0,numComp,3)] = (f_101[i]+f_111[i]-f_001[i]-f_011[i])*C4 / h0;
696     o[INDEX3(i,1,0,numComp,3)] = (f_011[i]+f_111[i]-f_001[i]-f_101[i])*C4 / h1;
697     o[INDEX3(i,2,0,numComp,3)] = (f_001[i]+f_011[i]+f_101[i]+f_111[i]-f_000[i]-f_010[i]-f_100[i]-f_110[i])*C3 / h2;
698 caltinay 3722 } /* end of component loop i */
699     } /* end of k0 loop */
700 caltinay 3711 } /* end of k1 loop */
701 caltinay 3722 } /* end of face 5 */
702     } // end of parallel section
703 caltinay 3711 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
704 caltinay 3703 } else {
705 caltinay 3707 stringstream msg;
706     msg << "setToGradient() not implemented for "
707     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
708     throw RipleyException(msg.str());
709 caltinay 3703 }
710     }
711    
712 caltinay 3713 void Brick::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
713     {
714     escript::Data& in = *const_cast<escript::Data*>(&arg);
715     const dim_t numComp = in.getDataPointSize();
716     const double h0 = m_l0/m_gNE0;
717     const double h1 = m_l1/m_gNE1;
718     const double h2 = m_l2/m_gNE2;
719     if (arg.getFunctionSpace().getTypeCode() == Elements) {
720     const double w_0 = h0*h1*h2/8.;
721     #pragma omp parallel
722     {
723     vector<double> int_local(numComp, 0);
724 caltinay 3722 #pragma omp for nowait
725 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
726     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
727     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
728     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
729     for (index_t i=0; i < numComp; ++i) {
730     const register double f_0 = f[INDEX2(i,0,numComp)];
731     const register double f_1 = f[INDEX2(i,1,numComp)];
732     const register double f_2 = f[INDEX2(i,2,numComp)];
733     const register double f_3 = f[INDEX2(i,3,numComp)];
734     const register double f_4 = f[INDEX2(i,4,numComp)];
735     const register double f_5 = f[INDEX2(i,5,numComp)];
736     const register double f_6 = f[INDEX2(i,6,numComp)];
737     const register double f_7 = f[INDEX2(i,7,numComp)];
738     int_local[i]+=(f_0+f_1+f_2+f_3+f_4+f_5+f_6+f_7)*w_0;
739     } /* end of component loop i */
740     } /* end of k0 loop */
741     } /* end of k1 loop */
742     } /* end of k2 loop */
743    
744     #pragma omp critical
745     for (index_t i=0; i<numComp; i++)
746     integrals[i]+=int_local[i];
747 caltinay 3722 } // end of parallel section
748 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
749     const double w_0 = h0*h1*h2;
750     #pragma omp parallel
751     {
752     vector<double> int_local(numComp, 0);
753 caltinay 3722 #pragma omp for nowait
754 caltinay 3713 for (index_t k2 = 0; k2 < m_NE2; ++k2) {
755     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
756     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
757     const double* f = in.getSampleDataRO(INDEX3(k0, k1, k2, m_NE0, m_NE1));
758     for (index_t i=0; i < numComp; ++i) {
759     int_local[i]+=f[i]*w_0;
760     } /* end of component loop i */
761     } /* end of k0 loop */
762     } /* end of k1 loop */
763     } /* end of k2 loop */
764    
765     #pragma omp critical
766     for (index_t i=0; i<numComp; i++)
767     integrals[i]+=int_local[i];
768 caltinay 3722 } // end of parallel section
769 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
770     const double w_0 = h1*h2/4.;
771     const double w_1 = h0*h2/4.;
772     const double w_2 = h0*h1/4.;
773     #pragma omp parallel
774     {
775     vector<double> int_local(numComp, 0);
776     if (m_faceOffset[0] > -1) {
777 caltinay 3722 #pragma omp for nowait
778 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
779     for (index_t k1=0; k1 < m_NE1; ++k1) {
780     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
781     for (index_t i=0; i < numComp; ++i) {
782     const register double f_0 = f[INDEX2(i,0,numComp)];
783     const register double f_1 = f[INDEX2(i,1,numComp)];
784     const register double f_2 = f[INDEX2(i,2,numComp)];
785     const register double f_3 = f[INDEX2(i,3,numComp)];
786     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
787     } /* end of component loop i */
788     } /* end of k1 loop */
789     } /* end of k2 loop */
790     }
791    
792     if (m_faceOffset[1] > -1) {
793 caltinay 3722 #pragma omp for nowait
794 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
795     for (index_t k1=0; k1 < m_NE1; ++k1) {
796     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
797     for (index_t i=0; i < numComp; ++i) {
798     const register double f_0 = f[INDEX2(i,0,numComp)];
799     const register double f_1 = f[INDEX2(i,1,numComp)];
800     const register double f_2 = f[INDEX2(i,2,numComp)];
801     const register double f_3 = f[INDEX2(i,3,numComp)];
802     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
803     } /* end of component loop i */
804     } /* end of k1 loop */
805     } /* end of k2 loop */
806     }
807    
808     if (m_faceOffset[2] > -1) {
809 caltinay 3722 #pragma omp for nowait
810 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
811     for (index_t k0=0; k0 < m_NE0; ++k0) {
812     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
813     for (index_t i=0; i < numComp; ++i) {
814     const register double f_0 = f[INDEX2(i,0,numComp)];
815     const register double f_1 = f[INDEX2(i,1,numComp)];
816     const register double f_2 = f[INDEX2(i,2,numComp)];
817     const register double f_3 = f[INDEX2(i,3,numComp)];
818     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
819     } /* end of component loop i */
820     } /* end of k1 loop */
821     } /* end of k2 loop */
822     }
823    
824     if (m_faceOffset[3] > -1) {
825 caltinay 3722 #pragma omp for nowait
826 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
827     for (index_t k0=0; k0 < m_NE0; ++k0) {
828     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
829     for (index_t i=0; i < numComp; ++i) {
830     const register double f_0 = f[INDEX2(i,0,numComp)];
831     const register double f_1 = f[INDEX2(i,1,numComp)];
832     const register double f_2 = f[INDEX2(i,2,numComp)];
833     const register double f_3 = f[INDEX2(i,3,numComp)];
834     int_local[i]+=(f_0+f_1+f_2+f_3)*w_1;
835     } /* end of component loop i */
836     } /* end of k1 loop */
837     } /* end of k2 loop */
838     }
839    
840     if (m_faceOffset[4] > -1) {
841 caltinay 3722 #pragma omp for nowait
842 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
843     for (index_t k0=0; k0 < m_NE0; ++k0) {
844     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
845     for (index_t i=0; i < numComp; ++i) {
846     const register double f_0 = f[INDEX2(i,0,numComp)];
847     const register double f_1 = f[INDEX2(i,1,numComp)];
848     const register double f_2 = f[INDEX2(i,2,numComp)];
849     const register double f_3 = f[INDEX2(i,3,numComp)];
850     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
851     } /* end of component loop i */
852     } /* end of k1 loop */
853     } /* end of k2 loop */
854     }
855    
856     if (m_faceOffset[5] > -1) {
857 caltinay 3722 #pragma omp for nowait
858 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
859     for (index_t k0=0; k0 < m_NE0; ++k0) {
860     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
861     for (index_t i=0; i < numComp; ++i) {
862     const register double f_0 = f[INDEX2(i,0,numComp)];
863     const register double f_1 = f[INDEX2(i,1,numComp)];
864     const register double f_2 = f[INDEX2(i,2,numComp)];
865     const register double f_3 = f[INDEX2(i,3,numComp)];
866     int_local[i]+=(f_0+f_1+f_2+f_3)*w_2;
867     } /* end of component loop i */
868     } /* end of k1 loop */
869     } /* end of k2 loop */
870     }
871    
872     #pragma omp critical
873     for (index_t i=0; i<numComp; i++)
874     integrals[i]+=int_local[i];
875 caltinay 3722 } // end of parallel section
876 caltinay 3713
877     } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
878     const double w_0 = h1*h2;
879     const double w_1 = h0*h2;
880     const double w_2 = h0*h1;
881     #pragma omp parallel
882     {
883     vector<double> int_local(numComp, 0);
884     if (m_faceOffset[0] > -1) {
885 caltinay 3722 #pragma omp for nowait
886 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
887     for (index_t k1=0; k1 < m_NE1; ++k1) {
888     const double* f = in.getSampleDataRO(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
889     for (index_t i=0; i < numComp; ++i) {
890     int_local[i]+=f[i]*w_0;
891     } /* end of component loop i */
892     } /* end of k1 loop */
893     } /* end of k2 loop */
894     }
895    
896     if (m_faceOffset[1] > -1) {
897 caltinay 3722 #pragma omp for nowait
898 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
899     for (index_t k1=0; k1 < m_NE1; ++k1) {
900     const double* f = in.getSampleDataRO(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
901     for (index_t i=0; i < numComp; ++i) {
902     int_local[i]+=f[i]*w_0;
903     } /* end of component loop i */
904     } /* end of k1 loop */
905     } /* end of k2 loop */
906     }
907    
908     if (m_faceOffset[2] > -1) {
909 caltinay 3722 #pragma omp for nowait
910 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
911     for (index_t k0=0; k0 < m_NE0; ++k0) {
912     const double* f = in.getSampleDataRO(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
913     for (index_t i=0; i < numComp; ++i) {
914     int_local[i]+=f[i]*w_1;
915     } /* end of component loop i */
916     } /* end of k1 loop */
917     } /* end of k2 loop */
918     }
919    
920     if (m_faceOffset[3] > -1) {
921 caltinay 3722 #pragma omp for nowait
922 caltinay 3713 for (index_t k2=0; k2 < m_NE2; ++k2) {
923     for (index_t k0=0; k0 < m_NE0; ++k0) {
924     const double* f = in.getSampleDataRO(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
925     for (index_t i=0; i < numComp; ++i) {
926     int_local[i]+=f[i]*w_1;
927     } /* end of component loop i */
928     } /* end of k1 loop */
929     } /* end of k2 loop */
930     }
931    
932     if (m_faceOffset[4] > -1) {
933 caltinay 3722 #pragma omp for nowait
934 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
935     for (index_t k0=0; k0 < m_NE0; ++k0) {
936     const double* f = in.getSampleDataRO(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
937     for (index_t i=0; i < numComp; ++i) {
938     int_local[i]+=f[i]*w_2;
939     } /* end of component loop i */
940     } /* end of k1 loop */
941     } /* end of k2 loop */
942     }
943    
944     if (m_faceOffset[5] > -1) {
945 caltinay 3722 #pragma omp for nowait
946 caltinay 3713 for (index_t k1=0; k1 < m_NE1; ++k1) {
947     for (index_t k0=0; k0 < m_NE0; ++k0) {
948     const double* f = in.getSampleDataRO(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
949     for (index_t i=0; i < numComp; ++i) {
950     int_local[i]+=f[i]*w_2;
951     } /* end of component loop i */
952     } /* end of k1 loop */
953     } /* end of k2 loop */
954     }
955    
956     #pragma omp critical
957     for (index_t i=0; i<numComp; i++)
958     integrals[i]+=int_local[i];
959 caltinay 3722 } // end of parallel section
960 caltinay 3713
961     } else {
962     stringstream msg;
963     msg << "setToIntegrals() not implemented for "
964     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
965     throw RipleyException(msg.str());
966     }
967     }
968    
969 caltinay 3722 void Brick::setToNormal(escript::Data& out) const
970     {
971     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
972     #pragma omp parallel
973     {
974     if (m_faceOffset[0] > -1) {
975     #pragma omp for nowait
976     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
977     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
978     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
979     // set vector at four quadrature points
980     *o++ = -1.; *o++ = 0.; *o++ = 0.;
981     *o++ = -1.; *o++ = 0.; *o++ = 0.;
982     *o++ = -1.; *o++ = 0.; *o++ = 0.;
983     *o++ = -1.; *o++ = 0.; *o = 0.;
984     }
985     }
986     }
987    
988     if (m_faceOffset[1] > -1) {
989     #pragma omp for nowait
990     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
991     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
992     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
993     // set vector at four quadrature points
994     *o++ = 1.; *o++ = 0.; *o++ = 0.;
995     *o++ = 1.; *o++ = 0.; *o++ = 0.;
996     *o++ = 1.; *o++ = 0.; *o++ = 0.;
997     *o++ = 1.; *o++ = 0.; *o = 0.;
998     }
999     }
1000     }
1001    
1002     if (m_faceOffset[2] > -1) {
1003     #pragma omp for nowait
1004     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1005     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1006     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1007     // set vector at four quadrature points
1008     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1009     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1010     *o++ = 0.; *o++ = -1.; *o++ = 0.;
1011     *o++ = 0.; *o++ = -1.; *o = 0.;
1012     }
1013     }
1014     }
1015    
1016     if (m_faceOffset[3] > -1) {
1017     #pragma omp for nowait
1018     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1019     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1020     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1021     // set vector at four quadrature points
1022     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1023     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1024     *o++ = 0.; *o++ = 1.; *o++ = 0.;
1025     *o++ = 0.; *o++ = 1.; *o = 0.;
1026     }
1027     }
1028     }
1029    
1030     if (m_faceOffset[4] > -1) {
1031     #pragma omp for nowait
1032     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1033     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1034     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1035     // set vector at four quadrature points
1036     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1037     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1038     *o++ = 0.; *o++ = 0.; *o++ = -1.;
1039     *o++ = 0.; *o++ = 0.; *o = -1.;
1040     }
1041     }
1042     }
1043    
1044     if (m_faceOffset[5] > -1) {
1045     #pragma omp for nowait
1046     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1047     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1048     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1049     // set vector at four quadrature points
1050     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1051     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1052     *o++ = 0.; *o++ = 0.; *o++ = 1.;
1053     *o++ = 0.; *o++ = 0.; *o = 1.;
1054     }
1055     }
1056     }
1057     } // end of parallel section
1058     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1059     #pragma omp parallel
1060     {
1061     if (m_faceOffset[0] > -1) {
1062     #pragma omp for nowait
1063     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1064     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1065     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1066     *o++ = -1.;
1067     *o++ = 0.;
1068     *o = 0.;
1069     }
1070     }
1071     }
1072    
1073     if (m_faceOffset[1] > -1) {
1074     #pragma omp for nowait
1075     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1076     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1077     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1078     *o++ = 1.;
1079     *o++ = 0.;
1080     *o = 0.;
1081     }
1082     }
1083     }
1084    
1085     if (m_faceOffset[2] > -1) {
1086     #pragma omp for nowait
1087     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1088     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1089     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1090     *o++ = 0.;
1091     *o++ = -1.;
1092     *o = 0.;
1093     }
1094     }
1095     }
1096    
1097     if (m_faceOffset[3] > -1) {
1098     #pragma omp for nowait
1099     for (index_t k2 = 0; k2 < m_NE2; ++k2) {
1100     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1101     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1102     *o++ = 0.;
1103     *o++ = 1.;
1104     *o = 0.;
1105     }
1106     }
1107     }
1108    
1109     if (m_faceOffset[4] > -1) {
1110     #pragma omp for nowait
1111     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1112     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1113     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1114     *o++ = 0.;
1115     *o++ = 0.;
1116     *o = -1.;
1117     }
1118     }
1119     }
1120    
1121     if (m_faceOffset[5] > -1) {
1122     #pragma omp for nowait
1123     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
1124     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
1125     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1126     *o++ = 0.;
1127     *o++ = 0.;
1128     *o = 1.;
1129     }
1130     }
1131     }
1132     } // end of parallel section
1133    
1134     } else {
1135     stringstream msg;
1136     msg << "setToNormal() not implemented for "
1137     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
1138     throw RipleyException(msg.str());
1139     }
1140     }
1141    
1142 caltinay 3691 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
1143     bool reducedColOrder) const
1144     {
1145     if (reducedRowOrder || reducedColOrder)
1146     throw RipleyException("getPattern() not implemented for reduced order");
1147    
1148     throw RipleyException("getPattern() not implemented");
1149     }
1150    
1151     void Brick::Print_Mesh_Info(const bool full) const
1152     {
1153     RipleyDomain::Print_Mesh_Info(full);
1154     if (full) {
1155     cout << " Id Coordinates" << endl;
1156     cout.precision(15);
1157     cout.setf(ios::scientific, ios::floatfield);
1158 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1159     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1160     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1161 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
1162     cout << " " << setw(5) << m_nodeId[i]
1163 caltinay 3698 << " " << xdx.first+(i%m_N0)*xdx.second
1164     << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
1165     << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
1166 caltinay 3691 }
1167     }
1168     }
1169    
1170 caltinay 3698 IndexVector Brick::getNumNodesPerDim() const
1171     {
1172     IndexVector ret;
1173     ret.push_back(m_N0);
1174     ret.push_back(m_N1);
1175     ret.push_back(m_N2);
1176     return ret;
1177     }
1178    
1179     IndexVector Brick::getNumElementsPerDim() const
1180     {
1181     IndexVector ret;
1182     ret.push_back(m_NE0);
1183     ret.push_back(m_NE1);
1184     ret.push_back(m_NE2);
1185     return ret;
1186     }
1187    
1188     IndexVector Brick::getNumFacesPerBoundary() const
1189     {
1190     IndexVector ret(6, 0);
1191     //left
1192     if (m_offset0==0)
1193     ret[0]=m_NE1*m_NE2;
1194     //right
1195     if (m_mpiInfo->rank%m_NX==m_NX-1)
1196     ret[1]=m_NE1*m_NE2;
1197     //bottom
1198     if (m_offset1==0)
1199     ret[2]=m_NE0*m_NE2;
1200     //top
1201     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1202     ret[3]=m_NE0*m_NE2;
1203     //front
1204     if (m_offset2==0)
1205     ret[4]=m_NE0*m_NE1;
1206     //back
1207     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1208     ret[5]=m_NE0*m_NE1;
1209     return ret;
1210     }
1211    
1212     pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
1213     {
1214     if (dim==0)
1215     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1216     else if (dim==1)
1217     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1218     else if (dim==2)
1219     return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
1220    
1221     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1222     }
1223    
1224    
1225 caltinay 3691 //protected
1226     dim_t Brick::getNumFaceElements() const
1227     {
1228     dim_t n=0;
1229     //left
1230     if (m_offset0==0)
1231     n+=m_NE1*m_NE2;
1232     //right
1233     if (m_mpiInfo->rank%m_NX==m_NX-1)
1234     n+=m_NE1*m_NE2;
1235     //bottom
1236     if (m_offset1==0)
1237     n+=m_NE0*m_NE2;
1238     //top
1239     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
1240     n+=m_NE0*m_NE2;
1241     //front
1242     if (m_offset2==0)
1243     n+=m_NE0*m_NE1;
1244     //back
1245     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
1246     n+=m_NE0*m_NE1;
1247    
1248     return n;
1249     }
1250    
1251     //protected
1252     void Brick::assembleCoordinates(escript::Data& arg) const
1253     {
1254     escriptDataC x = arg.getDataC();
1255     int numDim = m_numDim;
1256     if (!isDataPointShapeEqual(&x, 1, &numDim))
1257     throw RipleyException("setToX: Invalid Data object shape");
1258     if (!numSamplesEqual(&x, 1, getNumNodes()))
1259     throw RipleyException("setToX: Illegal number of samples in Data object");
1260    
1261 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1262     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1263     pair<double,double> zdz = getFirstCoordAndSpacing(2);
1264 caltinay 3691 arg.requireWrite();
1265     #pragma omp parallel for
1266     for (dim_t i2 = 0; i2 < m_N2; i2++) {
1267     for (dim_t i1 = 0; i1 < m_N1; i1++) {
1268     for (dim_t i0 = 0; i0 < m_N0; i0++) {
1269     double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
1270 caltinay 3698 point[0] = xdx.first+i0*xdx.second;
1271     point[1] = ydy.first+i1*ydy.second;
1272     point[2] = zdz.first+i2*zdz.second;
1273 caltinay 3691 }
1274     }
1275     }
1276     }
1277    
1278     //private
1279     void Brick::populateSampleIds()
1280     {
1281 caltinay 3697 // identifiers are ordered from left to right, bottom to top, front to back
1282     // on each rank, except for the shared nodes which are owned by the rank
1283     // below / to the left / to the front of the current rank
1284 caltinay 3698
1285     // build node distribution vector first.
1286     // m_nodeDistribution[i] is the first node id on rank i, that is
1287     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1288     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1289     m_nodeDistribution[1]=getNumNodes();
1290     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1291     const index_t x = k%m_NX;
1292     const index_t y = k%(m_NX*m_NY)/m_NX;
1293     const index_t z = k/(m_NX*m_NY);
1294     index_t numNodes=getNumNodes();
1295     if (x>0)
1296     numNodes-=m_N1*m_N2;
1297     if (y>0)
1298     numNodes-=m_N0*m_N2;
1299     if (z>0)
1300     numNodes-=m_N0*m_N1;
1301     // if an edge was subtracted twice add it back
1302     if (x>0 && y>0)
1303     numNodes+=m_N2;
1304     if (x>0 && z>0)
1305     numNodes+=m_N1;
1306     if (y>0 && z>0)
1307     numNodes+=m_N0;
1308     // the corner node was removed 3x and added back 3x, so subtract it
1309     if (x>0 && y>0 && z>0)
1310     numNodes--;
1311     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1312     }
1313     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1314    
1315 caltinay 3691 m_nodeId.resize(getNumNodes());
1316 caltinay 3698
1317     // the bottom, left and front planes are not owned by this rank so the
1318     // identifiers need to be computed accordingly
1319     const index_t left = (m_offset0==0 ? 0 : 1);
1320     const index_t bottom = (m_offset1==0 ? 0 : 1);
1321     const index_t front = (m_offset2==0 ? 0 : 1);
1322    
1323     // case 1: all nodes on left plane are owned by rank on the left
1324     if (left>0) {
1325     const int neighbour=m_mpiInfo->rank-1;
1326     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1327     const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1328 caltinay 3691 #pragma omp parallel for
1329 caltinay 3698 for (dim_t i2=front; i2<m_N2; i2++) {
1330     for (dim_t i1=bottom; i1<m_N1; i1++) {
1331     m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1332     + (i1-bottom+1)*leftN0
1333     + (i2-front)*leftN0*leftN1 - 1;
1334     }
1335     }
1336 caltinay 3691 }
1337 caltinay 3698 // case 2: all nodes on bottom plane are owned by rank below
1338     if (bottom>0) {
1339     const int neighbour=m_mpiInfo->rank-m_NX;
1340     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1341     const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1342     #pragma omp parallel for
1343     for (dim_t i2=front; i2<m_N2; i2++) {
1344     for (dim_t i0=left; i0<m_N0; i0++) {
1345     m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1346     + bottomN0*(bottomN1-1)
1347     + (i2-front)*bottomN0*bottomN1 + i0-left;
1348     }
1349     }
1350     }
1351     // case 3: all nodes on front plane are owned by rank in front
1352     if (front>0) {
1353     const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
1354     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1355     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1356     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1357     #pragma omp parallel for
1358     for (dim_t i1=bottom; i1<m_N1; i1++) {
1359     for (dim_t i0=left; i0<m_N0; i0++) {
1360     m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
1361     + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
1362     }
1363     }
1364     }
1365     // case 4: nodes on front bottom edge are owned by the corresponding rank
1366     if (front>0 && bottom>0) {
1367     const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
1368     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1369     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1370     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1371     #pragma omp parallel for
1372     for (dim_t i0=left; i0<m_N0; i0++) {
1373     m_nodeId[i0]=m_nodeDistribution[neighbour]
1374     + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
1375     }
1376     }
1377     // case 5: nodes on left bottom edge are owned by the corresponding rank
1378     if (left>0 && bottom>0) {
1379     const int neighbour=m_mpiInfo->rank-m_NX-1;
1380     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1381     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1382     #pragma omp parallel for
1383     for (dim_t i2=front; i2<m_N2; i2++) {
1384     m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1385     + (1+i2-front)*N0*N1-1;
1386     }
1387     }
1388     // case 6: nodes on left front edge are owned by the corresponding rank
1389     if (left>0 && front>0) {
1390     const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
1391     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1392     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1393     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1394     #pragma omp parallel for
1395     for (dim_t i1=bottom; i1<m_N1; i1++) {
1396     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1397     + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
1398     }
1399     }
1400     // case 7: bottom-left-front corner node owned by corresponding rank
1401     if (left>0 && bottom>0 && front>0) {
1402     const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
1403     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1404     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1405     const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
1406     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
1407     }
1408 caltinay 3697
1409 caltinay 3698 // the rest of the id's are contiguous
1410     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1411     #pragma omp parallel for
1412     for (dim_t i2=front; i2<m_N2; i2++) {
1413     for (dim_t i1=bottom; i1<m_N1; i1++) {
1414     for (dim_t i0=left; i0<m_N0; i0++) {
1415     m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
1416     +(i1-bottom)*(m_N0-left)
1417     +(i2-front)*(m_N0-left)*(m_N1-bottom);
1418     }
1419     }
1420     }
1421 caltinay 3735 m_nodeTags.assign(getNumNodes(), 0);
1422     updateTagsInUse(Nodes);
1423 caltinay 3698
1424 caltinay 3697 // elements
1425     m_elementId.resize(getNumElements());
1426     #pragma omp parallel for
1427     for (dim_t k=0; k<getNumElements(); k++) {
1428     m_elementId[k]=k;
1429     }
1430 caltinay 3735 m_elementTags.assign(getNumElements(), 0);
1431     updateTagsInUse(Elements);
1432 caltinay 3697
1433     // face elements
1434     m_faceId.resize(getNumFaceElements());
1435     #pragma omp parallel for
1436     for (dim_t k=0; k<getNumFaceElements(); k++) {
1437     m_faceId[k]=k;
1438     }
1439 caltinay 3704
1440 caltinay 3722 // generate face offset vector and set face tags
1441 caltinay 3704 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1442 caltinay 3722 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20, FRONT=100, BACK=200;
1443     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP, FRONT, BACK };
1444 caltinay 3704 m_faceOffset.assign(facesPerEdge.size(), -1);
1445 caltinay 3722 m_faceTags.clear();
1446 caltinay 3704 index_t offset=0;
1447     for (size_t i=0; i<facesPerEdge.size(); i++) {
1448     if (facesPerEdge[i]>0) {
1449     m_faceOffset[i]=offset;
1450     offset+=facesPerEdge[i];
1451 caltinay 3722 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1452 caltinay 3704 }
1453     }
1454 caltinay 3722 setTagMap("left", LEFT);
1455     setTagMap("right", RIGHT);
1456     setTagMap("bottom", BOTTOM);
1457     setTagMap("top", TOP);
1458     setTagMap("front", FRONT);
1459     setTagMap("back", BACK);
1460     updateTagsInUse(FaceElements);
1461 caltinay 3691 }
1462    
1463 caltinay 3703 //protected
1464 caltinay 3711 void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in,
1465     bool reduced) const
1466 caltinay 3703 {
1467     const dim_t numComp = in.getDataPointSize();
1468 caltinay 3711 if (reduced) {
1469 caltinay 3731 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1470     const double c0 = .125;
1471 caltinay 3703 #pragma omp parallel for
1472 caltinay 3711 for (index_t k2=0; k2 < m_NE2; ++k2) {
1473     for (index_t k1=0; k1 < m_NE1; ++k1) {
1474     for (index_t k0=0; k0 < m_NE0; ++k0) {
1475     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1476     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1477     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1478     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1479     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1480     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1481     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1482     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1483     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1484     for (index_t i=0; i < numComp; ++i) {
1485 caltinay 3731 o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i] + f_100[i] + f_101[i] + f_110[i] + f_111[i]);
1486 caltinay 3711 } /* end of component loop i */
1487     } /* end of k0 loop */
1488     } /* end of k1 loop */
1489     } /* end of k2 loop */
1490     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1491     } else {
1492 caltinay 3731 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1493     const double c0 = .0094373878376559314545;
1494     const double c1 = .035220810900864519624;
1495     const double c2 = .13144585576580214704;
1496     const double c3 = .49056261216234406855;
1497 caltinay 3711 #pragma omp parallel for
1498     for (index_t k2=0; k2 < m_NE2; ++k2) {
1499     for (index_t k1=0; k1 < m_NE1; ++k1) {
1500     for (index_t k0=0; k0 < m_NE0; ++k0) {
1501     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1502     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1503     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1504     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1505     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1506     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1507     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1508     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1509     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1510     for (index_t i=0; i < numComp; ++i) {
1511 caltinay 3731 o[INDEX2(i,numComp,0)] = f_000[i]*c3 + f_111[i]*c0 + c2*(f_001[i] + f_010[i] + f_100[i]) + c1*(f_011[i] + f_101[i] + f_110[i]);
1512     o[INDEX2(i,numComp,1)] = f_011[i]*c0 + f_100[i]*c3 + c2*(f_000[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_111[i]);
1513     o[INDEX2(i,numComp,2)] = f_010[i]*c3 + f_101[i]*c0 + c2*(f_000[i] + f_011[i] + f_110[i]) + c1*(f_001[i] + f_100[i] + f_111[i]);
1514     o[INDEX2(i,numComp,3)] = f_001[i]*c0 + f_110[i]*c3 + c2*(f_010[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_101[i]);
1515     o[INDEX2(i,numComp,4)] = f_001[i]*c3 + f_110[i]*c0 + c2*(f_000[i] + f_011[i] + f_101[i]) + c1*(f_010[i] + f_100[i] + f_111[i]);
1516     o[INDEX2(i,numComp,5)] = f_010[i]*c0 + f_101[i]*c3 + c2*(f_001[i] + f_100[i] + f_111[i]) + c1*(f_000[i] + f_011[i] + f_110[i]);
1517     o[INDEX2(i,numComp,6)] = f_011[i]*c3 + f_100[i]*c0 + c2*(f_001[i] + f_010[i] + f_111[i]) + c1*(f_000[i] + f_101[i] + f_110[i]);
1518     o[INDEX2(i,numComp,7)] = f_000[i]*c0 + f_111[i]*c3 + c2*(f_011[i] + f_101[i] + f_110[i]) + c1*(f_001[i] + f_010[i] + f_100[i]);
1519 caltinay 3711 } /* end of component loop i */
1520     } /* end of k0 loop */
1521     } /* end of k1 loop */
1522     } /* end of k2 loop */
1523     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1524     }
1525 caltinay 3703 }
1526    
1527     //protected
1528 caltinay 3711 void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1529     bool reduced) const
1530 caltinay 3703 {
1531 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1532 caltinay 3711 if (reduced) {
1533 caltinay 3731 const double c0 = .25;
1534     #pragma omp parallel
1535     {
1536     /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1537     if (m_faceOffset[0] > -1) {
1538     #pragma omp for nowait
1539     for (index_t k2=0; k2 < m_NE2; ++k2) {
1540     for (index_t k1=0; k1 < m_NE1; ++k1) {
1541     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1542     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1543     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1544     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1545     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1546     for (index_t i=0; i < numComp; ++i) {
1547     o[INDEX2(i,numComp,0)] = c0*(f_000[i] + f_001[i] + f_010[i] + f_011[i]);
1548     } /* end of component loop i */
1549     } /* end of k1 loop */