/[escript]/trunk/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /trunk/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3707 - (hide annotations)
Mon Dec 5 05:48:25 2011 UTC (7 years, 4 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Brick.cpp
File size: 73017 byte(s)
Fixes to the face element code.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Brick.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33     int d1, int d2) :
34     RipleyDomain(3),
35     m_gNE0(n0),
36     m_gNE1(n1),
37     m_gNE2(n2),
38     m_l0(l0),
39     m_l1(l1),
40     m_l2(l2),
41     m_NX(d0),
42     m_NY(d1),
43     m_NZ(d2)
44     {
45     // ensure number of subdivisions is valid and nodes can be distributed
46     // among number of ranks
47     if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48     throw RipleyException("Invalid number of spatial subdivisions");
49    
50     if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52    
53     // local number of elements
54     m_NE0 = n0/m_NX;
55     m_NE1 = n1/m_NY;
56     m_NE2 = n2/m_NZ;
57     // local number of nodes (not necessarily owned)
58     m_N0 = m_NE0+1;
59     m_N1 = m_NE1+1;
60     m_N2 = m_NE2+1;
61     // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63     m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64     m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65     populateSampleIds();
66     }
67    
68    
69     Brick::~Brick()
70     {
71     }
72    
73     string Brick::getDescription() const
74     {
75     return "ripley::Brick";
76     }
77    
78     bool Brick::operator==(const AbstractDomain& other) const
79     {
80     if (dynamic_cast<const Brick*>(&other))
81     return this==&other;
82    
83     return false;
84     }
85    
86     void Brick::dump(const string& fileName) const
87     {
88     #if USE_SILO
89     string fn(fileName);
90     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
91     fn+=".silo";
92     }
93    
94     const int NUM_SILO_FILES = 1;
95     const char* blockDirFmt = "/block%04d";
96     int driver=DB_HDF5;
97     string siloPath;
98     DBfile* dbfile = NULL;
99    
100     #ifdef ESYS_MPI
101     PMPIO_baton_t* baton = NULL;
102     #endif
103    
104     if (m_mpiInfo->size > 1) {
105     #ifdef ESYS_MPI
106     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
107     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
108     PMPIO_DefaultClose, (void*)&driver);
109     // try the fallback driver in case of error
110     if (!baton && driver != DB_PDB) {
111     driver = DB_PDB;
112     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
113     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
114     PMPIO_DefaultClose, (void*)&driver);
115     }
116     if (baton) {
117     char str[64];
118     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
119     siloPath = str;
120     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
121     }
122     #endif
123     } else {
124     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
125     getDescription().c_str(), driver);
126     // try the fallback driver in case of error
127     if (!dbfile && driver != DB_PDB) {
128     driver = DB_PDB;
129     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130     getDescription().c_str(), driver);
131     }
132     }
133    
134     if (!dbfile)
135     throw RipleyException("dump: Could not create Silo file");
136    
137     /*
138     if (driver==DB_HDF5) {
139     // gzip level 1 already provides good compression with minimal
140     // performance penalty. Some tests showed that gzip levels >3 performed
141     // rather badly on escript data both in terms of time and space
142     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
143     }
144     */
145    
146     boost::scoped_ptr<double> x(new double[m_N0]);
147     boost::scoped_ptr<double> y(new double[m_N1]);
148     boost::scoped_ptr<double> z(new double[m_N2]);
149     double* coords[3] = { x.get(), y.get(), z.get() };
150 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
151     pair<double,double> ydy = getFirstCoordAndSpacing(1);
152     pair<double,double> zdz = getFirstCoordAndSpacing(2);
153 caltinay 3691 #pragma omp parallel
154     {
155     #pragma omp for
156     for (dim_t i0 = 0; i0 < m_N0; i0++) {
157 caltinay 3698 coords[0][i0]=xdx.first+i0*xdx.second;
158 caltinay 3691 }
159     #pragma omp for
160     for (dim_t i1 = 0; i1 < m_N1; i1++) {
161 caltinay 3698 coords[1][i1]=ydy.first+i1*ydy.second;
162 caltinay 3691 }
163     #pragma omp for
164     for (dim_t i2 = 0; i2 < m_N2; i2++) {
165 caltinay 3698 coords[2][i2]=zdz.first+i2*zdz.second;
166 caltinay 3691 }
167     }
168 caltinay 3698 IndexVector dims = getNumNodesPerDim();
169     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
170 caltinay 3691 DB_COLLINEAR, NULL);
171    
172 caltinay 3698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
173     NULL, 0, DB_INT, DB_NODECENT, NULL);
174 caltinay 3691
175 caltinay 3698 // write element ids
176     dims = getNumElementsPerDim();
177     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
178     &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
179    
180     // rank 0 writes multimesh and multivar
181 caltinay 3691 if (m_mpiInfo->rank == 0) {
182     vector<string> tempstrings;
183     vector<char*> names;
184     for (dim_t i=0; i<m_mpiInfo->size; i++) {
185     stringstream path;
186     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
187     tempstrings.push_back(path.str());
188     names.push_back((char*)tempstrings.back().c_str());
189     }
190     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
191     DBSetDir(dbfile, "/");
192     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     types.assign(m_mpiInfo->size, DB_QUADVAR);
203     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
204     &types[0], NULL);
205 caltinay 3698 tempstrings.clear();
206     names.clear();
207     for (dim_t i=0; i<m_mpiInfo->size; i++) {
208     stringstream path;
209     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
210     tempstrings.push_back(path.str());
211     names.push_back((char*)tempstrings.back().c_str());
212     }
213     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
214     &types[0], NULL);
215 caltinay 3691 }
216    
217     if (m_mpiInfo->size > 1) {
218     #ifdef ESYS_MPI
219     PMPIO_HandOffBaton(baton, dbfile);
220     PMPIO_Finish(baton);
221     #endif
222     } else {
223     DBClose(dbfile);
224     }
225    
226     #else // USE_SILO
227     throw RipleyException("dump(): no Silo support");
228     #endif
229     }
230    
231     const int* Brick::borrowSampleReferenceIDs(int fsType) const
232     {
233 caltinay 3697 switch (fsType) {
234     case Nodes:
235     return &m_nodeId[0];
236     case Elements:
237     return &m_elementId[0];
238     case FaceElements:
239     return &m_faceId[0];
240     default:
241     break;
242     }
243 caltinay 3691
244 caltinay 3697 stringstream msg;
245     msg << "borrowSampleReferenceIDs() not implemented for function space type "
246     << fsType;
247     throw RipleyException(msg.str());
248 caltinay 3691 }
249    
250     bool Brick::ownSample(int fsCode, index_t id) const
251     {
252     #ifdef ESYS_MPI
253     if (fsCode == Nodes) {
254 caltinay 3698 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
255     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
256 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
257     } else
258     throw RipleyException("ownSample() only implemented for Nodes");
259     #else
260     return true;
261     #endif
262     }
263    
264 caltinay 3703 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
265     {
266     escript::Data& in = *const_cast<escript::Data*>(&cIn);
267     const dim_t numComp = in.getDataPointSize();
268     const double h0 = m_l0/m_gNE0;
269     const double h1 = m_l1/m_gNE1;
270     const double h2 = m_l1/m_gNE2;
271     if (out.getFunctionSpace().getTypeCode() == Elements) {
272     /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
273     const double tmp0_22 = -0.044658198738520451079/h1;
274     const double tmp0_16 = 0.16666666666666666667/h0;
275     const double tmp0_33 = -0.62200846792814621559/h1;
276     const double tmp0_0 = -0.62200846792814621559/h0;
277     const double tmp0_21 = -0.16666666666666666667/h1;
278     const double tmp0_17 = 0.62200846792814621559/h0;
279     const double tmp0_52 = -0.044658198738520451079/h2;
280     const double tmp0_1 = -0.16666666666666666667/h0;
281     const double tmp0_20 = -0.62200846792814621559/h1;
282     const double tmp0_14 = -0.044658198738520451079/h0;
283     const double tmp0_53 = -0.62200846792814621559/h2;
284     const double tmp0_49 = 0.16666666666666666667/h2;
285     const double tmp0_2 = 0.16666666666666666667/h0;
286     const double tmp0_27 = -0.044658198738520451079/h1;
287     const double tmp0_15 = -0.16666666666666666667/h0;
288     const double tmp0_50 = -0.16666666666666666667/h2;
289     const double tmp0_48 = 0.62200846792814621559/h2;
290     const double tmp0_3 = 0.044658198738520451079/h0;
291     const double tmp0_26 = -0.16666666666666666667/h1;
292     const double tmp0_12 = -0.62200846792814621559/h0;
293     const double tmp0_51 = 0.044658198738520451079/h2;
294     const double tmp0_25 = 0.62200846792814621559/h1;
295     const double tmp0_13 = 0.16666666666666666667/h0;
296     const double tmp0_56 = 0.16666666666666666667/h2;
297     const double tmp0_24 = 0.16666666666666666667/h1;
298     const double tmp0_10 = 0.62200846792814621559/h0;
299     const double tmp0_57 = 0.62200846792814621559/h2;
300     const double tmp0_11 = -0.16666666666666666667/h0;
301     const double tmp0_54 = -0.044658198738520451079/h2;
302     const double tmp0_38 = 0.16666666666666666667/h1;
303     const double tmp0_34 = -0.044658198738520451079/h1;
304     const double tmp0_42 = 0.16666666666666666667/h2;
305     const double tmp0_35 = -0.16666666666666666667/h1;
306     const double tmp0_36 = -0.62200846792814621559/h1;
307     const double tmp0_41 = 0.62200846792814621559/h2;
308     const double tmp0_8 = 0.044658198738520451079/h0;
309     const double tmp0_37 = 0.62200846792814621559/h1;
310     const double tmp0_29 = 0.16666666666666666667/h1;
311     const double tmp0_40 = -0.62200846792814621559/h2;
312     const double tmp0_9 = 0.16666666666666666667/h0;
313     const double tmp0_30 = 0.62200846792814621559/h1;
314     const double tmp0_28 = -0.16666666666666666667/h1;
315     const double tmp0_43 = 0.044658198738520451079/h2;
316     const double tmp0_32 = 0.16666666666666666667/h1;
317     const double tmp0_31 = 0.044658198738520451079/h1;
318     const double tmp0_39 = 0.044658198738520451079/h1;
319     const double tmp0_58 = -0.62200846792814621559/h2;
320     const double tmp0_55 = 0.044658198738520451079/h2;
321     const double tmp0_18 = -0.62200846792814621559/h0;
322     const double tmp0_45 = -0.16666666666666666667/h2;
323     const double tmp0_59 = -0.16666666666666666667/h2;
324     const double tmp0_4 = -0.044658198738520451079/h0;
325     const double tmp0_19 = 0.044658198738520451079/h0;
326     const double tmp0_44 = -0.044658198738520451079/h2;
327     const double tmp0_5 = 0.62200846792814621559/h0;
328     const double tmp0_47 = 0.16666666666666666667/h2;
329     const double tmp0_6 = -0.16666666666666666667/h0;
330     const double tmp0_23 = 0.044658198738520451079/h1;
331     const double tmp0_46 = -0.16666666666666666667/h2;
332     const double tmp0_7 = -0.044658198738520451079/h0;
333     #pragma omp parallel for
334 caltinay 3707 for (index_t k2=0; k2 < m_NE2; ++k2) {
335     for (index_t k1=0; k1 < m_NE1; ++k1) {
336     for (index_t k0=0; k0 < m_NE0; ++k0) {
337 caltinay 3703 const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
338     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
339     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
340     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
341     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
342     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
343     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
344     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
345     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
346     for (index_t i=0; i < numComp; ++i) {
347     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
348     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_20 + f_010[i]*tmp0_25 + f_101[i]*tmp0_22 + f_111[i]*tmp0_23 + tmp0_21*(f_001[i] + f_100[i]) + tmp0_24*(f_011[i] + f_110[i]);
349     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_40 + f_001[i]*tmp0_41 + f_110[i]*tmp0_44 + f_111[i]*tmp0_43 + tmp0_42*(f_011[i] + f_101[i]) + tmp0_45*(f_010[i] + f_100[i]);
350     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
351     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_27 + f_010[i]*tmp0_32 + f_011[i]*tmp0_31 + f_100[i]*tmp0_33 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
352     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_52 + f_011[i]*tmp0_51 + f_100[i]*tmp0_53 + f_101[i]*tmp0_48 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
353     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
354     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_20 + f_010[i]*tmp0_25 + f_101[i]*tmp0_22 + f_111[i]*tmp0_23 + tmp0_21*(f_001[i] + f_100[i]) + tmp0_24*(f_011[i] + f_110[i]);
355     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_53 + f_011[i]*tmp0_48 + f_100[i]*tmp0_52 + f_101[i]*tmp0_51 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
356     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
357     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_27 + f_010[i]*tmp0_32 + f_011[i]*tmp0_31 + f_100[i]*tmp0_33 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
358     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_54 + f_001[i]*tmp0_55 + f_110[i]*tmp0_58 + f_111[i]*tmp0_57 + tmp0_56*(f_011[i] + f_101[i]) + tmp0_59*(f_010[i] + f_100[i]);
359     o[INDEX3(i,0,4,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
360     o[INDEX3(i,1,4,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_33 + f_010[i]*tmp0_32 + f_011[i]*tmp0_30 + f_100[i]*tmp0_27 + f_101[i]*tmp0_28 + f_110[i]*tmp0_31 + f_111[i]*tmp0_29;
361     o[INDEX3(i,2,4,numComp,3)] = f_000[i]*tmp0_40 + f_001[i]*tmp0_41 + f_110[i]*tmp0_44 + f_111[i]*tmp0_43 + tmp0_42*(f_011[i] + f_101[i]) + tmp0_45*(f_010[i] + f_100[i]);
362     o[INDEX3(i,0,5,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
363     o[INDEX3(i,1,5,numComp,3)] = f_000[i]*tmp0_34 + f_010[i]*tmp0_39 + f_101[i]*tmp0_36 + f_111[i]*tmp0_37 + tmp0_35*(f_001[i] + f_100[i]) + tmp0_38*(f_011[i] + f_110[i]);
364     o[INDEX3(i,2,5,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_52 + f_011[i]*tmp0_51 + f_100[i]*tmp0_53 + f_101[i]*tmp0_48 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
365     o[INDEX3(i,0,6,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
366     o[INDEX3(i,1,6,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_33 + f_010[i]*tmp0_32 + f_011[i]*tmp0_30 + f_100[i]*tmp0_27 + f_101[i]*tmp0_28 + f_110[i]*tmp0_31 + f_111[i]*tmp0_29;
367     o[INDEX3(i,2,6,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_53 + f_011[i]*tmp0_48 + f_100[i]*tmp0_52 + f_101[i]*tmp0_51 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
368     o[INDEX3(i,0,7,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
369     o[INDEX3(i,1,7,numComp,3)] = f_000[i]*tmp0_34 + f_010[i]*tmp0_39 + f_101[i]*tmp0_36 + f_111[i]*tmp0_37 + tmp0_35*(f_001[i] + f_100[i]) + tmp0_38*(f_011[i] + f_110[i]);
370     o[INDEX3(i,2,7,numComp,3)] = f_000[i]*tmp0_54 + f_001[i]*tmp0_55 + f_110[i]*tmp0_58 + f_111[i]*tmp0_57 + tmp0_56*(f_011[i] + f_101[i]) + tmp0_59*(f_010[i] + f_100[i]);
371     } /* end of component loop i */
372     } /* end of k0 loop */
373     } /* end of k1 loop */
374     } /* end of k2 loop */
375     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
376 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
377     /* GENERATOR SNIP_GRAD_FACES TOP */
378     if (m_faceOffset[0] > -1) {
379     const double tmp0_22 = 0.21132486540518711775/h1;
380     const double tmp0_16 = 0.16666666666666666667/h0;
381     const double tmp0_33 = 0.21132486540518711775/h2;
382     const double tmp0_0 = -0.62200846792814621559/h0;
383     const double tmp0_21 = -0.21132486540518711775/h1;
384     const double tmp0_17 = 0.62200846792814621559/h0;
385     const double tmp0_1 = -0.16666666666666666667/h0;
386     const double tmp0_20 = -0.78867513459481288225/h1;
387     const double tmp0_14 = -0.044658198738520451079/h0;
388     const double tmp0_2 = 0.16666666666666666667/h0;
389     const double tmp0_27 = 0.21132486540518711775/h1;
390     const double tmp0_15 = -0.16666666666666666667/h0;
391     const double tmp0_3 = 0.044658198738520451079/h0;
392     const double tmp0_26 = 0.78867513459481288225/h1;
393     const double tmp0_12 = -0.62200846792814621559/h0;
394     const double tmp0_25 = -0.78867513459481288225/h1;
395     const double tmp0_13 = 0.16666666666666666667/h0;
396     const double tmp0_24 = -0.21132486540518711775/h1;
397     const double tmp0_10 = 0.62200846792814621559/h0;
398     const double tmp0_11 = -0.16666666666666666667/h0;
399     const double tmp0_34 = 0.78867513459481288225/h2;
400     const double tmp0_35 = -0.78867513459481288225/h2;
401     const double tmp0_8 = 0.044658198738520451079/h0;
402     const double tmp0_29 = 0.78867513459481288225/h2;
403     const double tmp0_9 = 0.16666666666666666667/h0;
404     const double tmp0_30 = 0.21132486540518711775/h2;
405     const double tmp0_28 = -0.78867513459481288225/h2;
406     const double tmp0_32 = -0.21132486540518711775/h2;
407     const double tmp0_31 = -0.21132486540518711775/h2;
408     const double tmp0_18 = -0.62200846792814621559/h0;
409     const double tmp0_4 = -0.044658198738520451079/h0;
410     const double tmp0_19 = 0.044658198738520451079/h0;
411     const double tmp0_5 = 0.62200846792814621559/h0;
412     const double tmp0_6 = -0.16666666666666666667/h0;
413     const double tmp0_23 = 0.78867513459481288225/h1;
414     const double tmp0_7 = -0.044658198738520451079/h0;
415     #pragma omp parallel for
416     for (index_t k2=0; k2 < m_NE2; ++k2) {
417     for (index_t k1=0; k1 < m_NE1; ++k1) {
418     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
419     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
420     const register double* f_101 = in.getSampleDataRO(INDEX3(1,k1,k2+1, m_N0,m_N1));
421     const register double* f_111 = in.getSampleDataRO(INDEX3(1,k1+1,k2+1, m_N0,m_N1));
422     const register double* f_110 = in.getSampleDataRO(INDEX3(1,k1+1,k2, m_N0,m_N1));
423     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
424     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
425     const register double* f_100 = in.getSampleDataRO(INDEX3(1,k1,k2, m_N0,m_N1));
426     double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
427     for (index_t i=0; i < numComp; ++i) {
428     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
429     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_20 + f_001[i]*tmp0_21 + f_010[i]*tmp0_23 + f_011[i]*tmp0_22;
430     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_010[i]*tmp0_31 + f_011[i]*tmp0_30;
431     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
432     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_20 + f_001[i]*tmp0_21 + f_010[i]*tmp0_23 + f_011[i]*tmp0_22;
433     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_010[i]*tmp0_35 + f_011[i]*tmp0_34;
434     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
435     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_24 + f_001[i]*tmp0_25 + f_010[i]*tmp0_27 + f_011[i]*tmp0_26;
436     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_010[i]*tmp0_31 + f_011[i]*tmp0_30;
437     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
438     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_24 + f_001[i]*tmp0_25 + f_010[i]*tmp0_27 + f_011[i]*tmp0_26;
439     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_010[i]*tmp0_35 + f_011[i]*tmp0_34;
440     } /* end of component loop i */
441     } /* end of k1 loop */
442     } /* end of k2 loop */
443     } /* end of face 0 */
444     if (m_faceOffset[1] > -1) {
445     const double tmp0_22 = 0.78867513459481288225/h1;
446     const double tmp0_16 = 0.16666666666666666667/h0;
447     const double tmp0_33 = 0.78867513459481288225/h2;
448     const double tmp0_0 = -0.62200846792814621559/h0;
449     const double tmp0_21 = 0.21132486540518711775/h1;
450     const double tmp0_17 = 0.62200846792814621559/h0;
451     const double tmp0_1 = -0.16666666666666666667/h0;
452     const double tmp0_20 = -0.21132486540518711775/h1;
453     const double tmp0_14 = -0.044658198738520451079/h0;
454     const double tmp0_2 = 0.16666666666666666667/h0;
455     const double tmp0_27 = -0.21132486540518711775/h1;
456     const double tmp0_15 = -0.16666666666666666667/h0;
457     const double tmp0_3 = 0.044658198738520451079/h0;
458     const double tmp0_26 = 0.21132486540518711775/h1;
459     const double tmp0_12 = -0.62200846792814621559/h0;
460     const double tmp0_25 = 0.78867513459481288225/h1;
461     const double tmp0_13 = 0.16666666666666666667/h0;
462     const double tmp0_24 = -0.78867513459481288225/h1;
463     const double tmp0_10 = 0.62200846792814621559/h0;
464     const double tmp0_11 = -0.16666666666666666667/h0;
465     const double tmp0_34 = -0.78867513459481288225/h2;
466     const double tmp0_35 = -0.21132486540518711775/h2;
467     const double tmp0_8 = 0.044658198738520451079/h0;
468     const double tmp0_29 = 0.21132486540518711775/h2;
469     const double tmp0_9 = 0.16666666666666666667/h0;
470     const double tmp0_30 = -0.21132486540518711775/h2;
471     const double tmp0_28 = 0.78867513459481288225/h2;
472     const double tmp0_32 = 0.21132486540518711775/h2;
473     const double tmp0_31 = -0.78867513459481288225/h2;
474     const double tmp0_18 = -0.62200846792814621559/h0;
475     const double tmp0_4 = -0.044658198738520451079/h0;
476     const double tmp0_19 = 0.044658198738520451079/h0;
477     const double tmp0_5 = 0.62200846792814621559/h0;
478     const double tmp0_6 = -0.16666666666666666667/h0;
479     const double tmp0_23 = -0.78867513459481288225/h1;
480     const double tmp0_7 = -0.044658198738520451079/h0;
481     #pragma omp parallel for
482     for (index_t k2=0; k2 < m_NE2; ++k2) {
483     for (index_t k1=0; k1 < m_NE1; ++k1) {
484     const register double* f_000 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2, m_N0,m_N1));
485     const register double* f_001 = in.getSampleDataRO(INDEX3(m_N0-2,k1,k2+1, m_N0,m_N1));
486     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
487     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
488     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
489     const register double* f_011 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2+1, m_N0,m_N1));
490     const register double* f_010 = in.getSampleDataRO(INDEX3(m_N0-2,k1+1,k2, m_N0,m_N1));
491     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
492     double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
493     for (index_t i=0; i < numComp; ++i) {
494     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
495     o[INDEX3(i,1,0,numComp,3)] = f_100[i]*tmp0_23 + f_101[i]*tmp0_20 + f_110[i]*tmp0_22 + f_111[i]*tmp0_21;
496     o[INDEX3(i,2,0,numComp,3)] = f_100[i]*tmp0_31 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
497     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
498     o[INDEX3(i,1,1,numComp,3)] = f_100[i]*tmp0_23 + f_101[i]*tmp0_20 + f_110[i]*tmp0_22 + f_111[i]*tmp0_21;
499     o[INDEX3(i,2,1,numComp,3)] = f_100[i]*tmp0_35 + f_101[i]*tmp0_32 + f_110[i]*tmp0_34 + f_111[i]*tmp0_33;
500     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
501     o[INDEX3(i,1,2,numComp,3)] = f_100[i]*tmp0_27 + f_101[i]*tmp0_24 + f_110[i]*tmp0_26 + f_111[i]*tmp0_25;
502     o[INDEX3(i,2,2,numComp,3)] = f_100[i]*tmp0_31 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
503     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
504     o[INDEX3(i,1,3,numComp,3)] = f_100[i]*tmp0_27 + f_101[i]*tmp0_24 + f_110[i]*tmp0_26 + f_111[i]*tmp0_25;
505     o[INDEX3(i,2,3,numComp,3)] = f_100[i]*tmp0_35 + f_101[i]*tmp0_32 + f_110[i]*tmp0_34 + f_111[i]*tmp0_33;
506     } /* end of component loop i */
507     } /* end of k1 loop */
508     } /* end of k2 loop */
509     } /* end of face 1 */
510     if (m_faceOffset[2] > -1) {
511     const double tmp0_22 = -0.044658198738520451079/h1;
512     const double tmp0_16 = -0.16666666666666666667/h1;
513     const double tmp0_33 = 0.21132486540518711775/h2;
514     const double tmp0_0 = -0.78867513459481288225/h0;
515     const double tmp0_21 = 0.16666666666666666667/h1;
516     const double tmp0_17 = -0.62200846792814621559/h1;
517     const double tmp0_1 = -0.21132486540518711775/h0;
518     const double tmp0_20 = 0.044658198738520451079/h1;
519     const double tmp0_14 = -0.16666666666666666667/h1;
520     const double tmp0_2 = 0.21132486540518711775/h0;
521     const double tmp0_27 = 0.044658198738520451079/h1;
522     const double tmp0_15 = -0.044658198738520451079/h1;
523     const double tmp0_3 = 0.78867513459481288225/h0;
524     const double tmp0_26 = 0.16666666666666666667/h1;
525     const double tmp0_12 = 0.16666666666666666667/h1;
526     const double tmp0_25 = 0.62200846792814621559/h1;
527     const double tmp0_13 = 0.62200846792814621559/h1;
528     const double tmp0_24 = -0.62200846792814621559/h1;
529     const double tmp0_10 = -0.044658198738520451079/h1;
530     const double tmp0_11 = 0.044658198738520451079/h1;
531     const double tmp0_34 = 0.78867513459481288225/h2;
532     const double tmp0_35 = -0.78867513459481288225/h2;
533     const double tmp0_8 = -0.62200846792814621559/h1;
534     const double tmp0_29 = 0.78867513459481288225/h2;
535     const double tmp0_9 = -0.16666666666666666667/h1;
536     const double tmp0_30 = 0.21132486540518711775/h2;
537     const double tmp0_28 = -0.78867513459481288225/h2;
538     const double tmp0_32 = -0.21132486540518711775/h2;
539     const double tmp0_31 = -0.21132486540518711775/h2;
540     const double tmp0_18 = 0.16666666666666666667/h1;
541     const double tmp0_4 = -0.21132486540518711775/h0;
542     const double tmp0_19 = 0.62200846792814621559/h1;
543     const double tmp0_5 = -0.78867513459481288225/h0;
544     const double tmp0_6 = 0.78867513459481288225/h0;
545     const double tmp0_23 = -0.16666666666666666667/h1;
546     const double tmp0_7 = 0.21132486540518711775/h0;
547     #pragma omp parallel for
548     for (index_t k2=0; k2 < m_NE2; ++k2) {
549     for (index_t k0=0; k0 < m_NE0; ++k0) {
550     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
551     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
552     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
553     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
554     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,1,k2+1, m_N0,m_N1));
555     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,1,k2, m_N0,m_N1));
556     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,1,k2+1, m_N0,m_N1));
557     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,1,k2, m_N0,m_N1));
558     double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
559     for (index_t i=0; i < numComp; ++i) {
560     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_001[i]*tmp0_1 + f_100[i]*tmp0_3 + f_101[i]*tmp0_2;
561     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_13 + f_101[i]*tmp0_10 + f_111[i]*tmp0_11 + tmp0_12*(f_011[i] + f_110[i]) + tmp0_9*(f_001[i] + f_100[i]);
562     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_100[i]*tmp0_31 + f_101[i]*tmp0_30;
563     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_001[i]*tmp0_1 + f_100[i]*tmp0_3 + f_101[i]*tmp0_2;
564     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_14 + f_001[i]*tmp0_15 + f_010[i]*tmp0_21 + f_011[i]*tmp0_20 + f_100[i]*tmp0_17 + f_101[i]*tmp0_16 + f_110[i]*tmp0_19 + f_111[i]*tmp0_18;
565     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_100[i]*tmp0_35 + f_101[i]*tmp0_34;
566     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_4 + f_001[i]*tmp0_5 + f_100[i]*tmp0_7 + f_101[i]*tmp0_6;
567     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_14 + f_001[i]*tmp0_17 + f_010[i]*tmp0_21 + f_011[i]*tmp0_19 + f_100[i]*tmp0_15 + f_101[i]*tmp0_16 + f_110[i]*tmp0_20 + f_111[i]*tmp0_18;
568     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_28 + f_001[i]*tmp0_29 + f_100[i]*tmp0_31 + f_101[i]*tmp0_30;
569     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_4 + f_001[i]*tmp0_5 + f_100[i]*tmp0_7 + f_101[i]*tmp0_6;
570     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_22 + f_010[i]*tmp0_27 + f_101[i]*tmp0_24 + f_111[i]*tmp0_25 + tmp0_23*(f_001[i] + f_100[i]) + tmp0_26*(f_011[i] + f_110[i]);
571     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_32 + f_001[i]*tmp0_33 + f_100[i]*tmp0_35 + f_101[i]*tmp0_34;
572     } /* end of component loop i */
573     } /* end of k0 loop */
574     } /* end of k2 loop */
575     } /* end of face 2 */
576     if (m_faceOffset[3] > -1) {
577     const double tmp0_22 = 0.16666666666666666667/h1;
578     const double tmp0_16 = 0.16666666666666666667/h1;
579     const double tmp0_33 = -0.78867513459481288225/h2;
580     const double tmp0_0 = -0.21132486540518711775/h0;
581     const double tmp0_21 = -0.62200846792814621559/h1;
582     const double tmp0_17 = 0.16666666666666666667/h1;
583     const double tmp0_1 = 0.78867513459481288225/h0;
584     const double tmp0_20 = -0.16666666666666666667/h1;
585     const double tmp0_14 = 0.044658198738520451079/h1;
586     const double tmp0_2 = -0.78867513459481288225/h0;
587     const double tmp0_27 = -0.62200846792814621559/h1;
588     const double tmp0_15 = 0.62200846792814621559/h1;
589     const double tmp0_3 = 0.21132486540518711775/h0;
590     const double tmp0_26 = -0.16666666666666666667/h1;
591     const double tmp0_12 = -0.16666666666666666667/h1;
592     const double tmp0_25 = -0.044658198738520451079/h1;
593     const double tmp0_13 = -0.044658198738520451079/h1;
594     const double tmp0_24 = 0.62200846792814621559/h1;
595     const double tmp0_10 = 0.044658198738520451079/h1;
596     const double tmp0_11 = -0.62200846792814621559/h1;
597     const double tmp0_34 = -0.21132486540518711775/h2;
598     const double tmp0_35 = 0.78867513459481288225/h2;
599     const double tmp0_8 = 0.16666666666666666667/h1;
600     const double tmp0_29 = -0.21132486540518711775/h2;
601     const double tmp0_9 = 0.62200846792814621559/h1;
602     const double tmp0_30 = -0.78867513459481288225/h2;
603     const double tmp0_28 = 0.78867513459481288225/h2;
604     const double tmp0_32 = 0.21132486540518711775/h2;
605     const double tmp0_31 = 0.21132486540518711775/h2;
606     const double tmp0_18 = -0.16666666666666666667/h1;
607     const double tmp0_4 = -0.78867513459481288225/h0;
608     const double tmp0_19 = -0.044658198738520451079/h1;
609     const double tmp0_5 = 0.21132486540518711775/h0;
610     const double tmp0_6 = -0.21132486540518711775/h0;
611     const double tmp0_23 = 0.044658198738520451079/h1;
612     const double tmp0_7 = 0.78867513459481288225/h0;
613     #pragma omp parallel for
614     for (index_t k2=0; k2 < m_NE2; ++k2) {
615     for (index_t k0=0; k0 < m_NE0; ++k0) {
616     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
617     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
618     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
619     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
620     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2, m_N0,m_N1));
621     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,m_N1-2,k2+1, m_N0,m_N1));
622     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2+1, m_N0,m_N1));
623     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,m_N1-2,k2, m_N0,m_N1));
624     double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
625     for (index_t i=0; i < numComp; ++i) {
626     o[INDEX3(i,0,0,numComp,3)] = f_010[i]*tmp0_2 + f_011[i]*tmp0_0 + f_110[i]*tmp0_1 + f_111[i]*tmp0_3;
627     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_11 + f_010[i]*tmp0_9 + f_101[i]*tmp0_13 + f_111[i]*tmp0_10 + tmp0_12*(f_001[i] + f_100[i]) + tmp0_8*(f_011[i] + f_110[i]);
628     o[INDEX3(i,2,0,numComp,3)] = f_010[i]*tmp0_30 + f_011[i]*tmp0_28 + f_110[i]*tmp0_29 + f_111[i]*tmp0_31;
629     o[INDEX3(i,0,1,numComp,3)] = f_010[i]*tmp0_2 + f_011[i]*tmp0_0 + f_110[i]*tmp0_1 + f_111[i]*tmp0_3;
630     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_18 + f_001[i]*tmp0_19 + f_010[i]*tmp0_16 + f_011[i]*tmp0_14 + f_100[i]*tmp0_21 + f_101[i]*tmp0_20 + f_110[i]*tmp0_15 + f_111[i]*tmp0_17;
631     o[INDEX3(i,2,1,numComp,3)] = f_010[i]*tmp0_34 + f_011[i]*tmp0_32 + f_110[i]*tmp0_33 + f_111[i]*tmp0_35;
632     o[INDEX3(i,0,2,numComp,3)] = f_010[i]*tmp0_6 + f_011[i]*tmp0_4 + f_110[i]*tmp0_5 + f_111[i]*tmp0_7;
633     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_18 + f_001[i]*tmp0_21 + f_010[i]*tmp0_16 + f_011[i]*tmp0_15 + f_100[i]*tmp0_19 + f_101[i]*tmp0_20 + f_110[i]*tmp0_14 + f_111[i]*tmp0_17;
634     o[INDEX3(i,2,2,numComp,3)] = f_010[i]*tmp0_30 + f_011[i]*tmp0_28 + f_110[i]*tmp0_29 + f_111[i]*tmp0_31;
635     o[INDEX3(i,0,3,numComp,3)] = f_010[i]*tmp0_6 + f_011[i]*tmp0_4 + f_110[i]*tmp0_5 + f_111[i]*tmp0_7;
636     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_25 + f_010[i]*tmp0_23 + f_101[i]*tmp0_27 + f_111[i]*tmp0_24 + tmp0_22*(f_011[i] + f_110[i]) + tmp0_26*(f_001[i] + f_100[i]);
637     o[INDEX3(i,2,3,numComp,3)] = f_010[i]*tmp0_34 + f_011[i]*tmp0_32 + f_110[i]*tmp0_33 + f_111[i]*tmp0_35;
638     } /* end of component loop i */
639     } /* end of k0 loop */
640     } /* end of k2 loop */
641     } /* end of face 3 */
642     if (m_faceOffset[4] > -1) {
643     const double tmp0_22 = -0.16666666666666666667/h2;
644     const double tmp0_16 = -0.62200846792814621559/h2;
645     const double tmp0_33 = 0.044658198738520451079/h2;
646     const double tmp0_0 = -0.78867513459481288225/h0;
647     const double tmp0_21 = 0.044658198738520451079/h2;
648     const double tmp0_17 = -0.16666666666666666667/h2;
649     const double tmp0_1 = 0.78867513459481288225/h0;
650     const double tmp0_20 = 0.16666666666666666667/h2;
651     const double tmp0_14 = 0.78867513459481288225/h1;
652     const double tmp0_2 = 0.21132486540518711775/h0;
653     const double tmp0_27 = 0.62200846792814621559/h2;
654     const double tmp0_15 = 0.21132486540518711775/h1;
655     const double tmp0_3 = -0.21132486540518711775/h0;
656     const double tmp0_26 = 0.16666666666666666667/h2;
657     const double tmp0_12 = -0.21132486540518711775/h1;
658     const double tmp0_25 = -0.044658198738520451079/h2;
659     const double tmp0_13 = -0.78867513459481288225/h1;
660     const double tmp0_24 = -0.16666666666666666667/h2;
661     const double tmp0_10 = 0.21132486540518711775/h1;
662     const double tmp0_11 = 0.78867513459481288225/h1;
663     const double tmp0_34 = 0.16666666666666666667/h2;
664     const double tmp0_35 = 0.62200846792814621559/h2;
665     const double tmp0_8 = -0.78867513459481288225/h1;
666     const double tmp0_29 = 0.16666666666666666667/h2;
667     const double tmp0_9 = -0.21132486540518711775/h1;
668     const double tmp0_30 = -0.044658198738520451079/h2;
669     const double tmp0_28 = 0.044658198738520451079/h2;
670     const double tmp0_32 = -0.62200846792814621559/h2;
671     const double tmp0_31 = -0.16666666666666666667/h2;
672     const double tmp0_18 = -0.044658198738520451079/h2;
673     const double tmp0_4 = -0.21132486540518711775/h0;
674     const double tmp0_19 = 0.62200846792814621559/h2;
675     const double tmp0_5 = 0.21132486540518711775/h0;
676     const double tmp0_6 = 0.78867513459481288225/h0;
677     const double tmp0_23 = -0.62200846792814621559/h2;
678     const double tmp0_7 = -0.78867513459481288225/h0;
679     #pragma omp parallel for
680     for (index_t k1=0; k1 < m_NE1; ++k1) {
681     for (index_t k0=0; k0 < m_NE0; ++k0) {
682     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
683     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
684     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
685     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
686     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,1, m_N0,m_N1));
687     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,1, m_N0,m_N1));
688     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,1, m_N0,m_N1));
689     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,1, m_N0,m_N1));
690     double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
691     for (index_t i=0; i < numComp; ++i) {
692     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_010[i]*tmp0_3 + f_100[i]*tmp0_1 + f_110[i]*tmp0_2;
693     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_11 + f_100[i]*tmp0_9 + f_110[i]*tmp0_10;
694     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_16 + f_001[i]*tmp0_19 + f_110[i]*tmp0_18 + f_111[i]*tmp0_21 + tmp0_17*(f_010[i] + f_100[i]) + tmp0_20*(f_011[i] + f_101[i]);
695     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_010[i]*tmp0_3 + f_100[i]*tmp0_1 + f_110[i]*tmp0_2;
696     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_12 + f_010[i]*tmp0_15 + f_100[i]*tmp0_13 + f_110[i]*tmp0_14;
697     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_22 + f_001[i]*tmp0_26 + f_010[i]*tmp0_25 + f_011[i]*tmp0_28 + f_100[i]*tmp0_23 + f_101[i]*tmp0_27 + f_110[i]*tmp0_24 + f_111[i]*tmp0_29;
698     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_4 + f_010[i]*tmp0_7 + f_100[i]*tmp0_5 + f_110[i]*tmp0_6;
699     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_8 + f_010[i]*tmp0_11 + f_100[i]*tmp0_9 + f_110[i]*tmp0_10;
700     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_22 + f_001[i]*tmp0_26 + f_010[i]*tmp0_23 + f_011[i]*tmp0_27 + f_100[i]*tmp0_25 + f_101[i]*tmp0_28 + f_110[i]*tmp0_24 + f_111[i]*tmp0_29;
701     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_4 + f_010[i]*tmp0_7 + f_100[i]*tmp0_5 + f_110[i]*tmp0_6;
702     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_12 + f_010[i]*tmp0_15 + f_100[i]*tmp0_13 + f_110[i]*tmp0_14;
703     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_30 + f_001[i]*tmp0_33 + f_110[i]*tmp0_32 + f_111[i]*tmp0_35 + tmp0_31*(f_010[i] + f_100[i]) + tmp0_34*(f_011[i] + f_101[i]);
704     } /* end of component loop i */
705     } /* end of k0 loop */
706     } /* end of k1 loop */
707     } /* end of face 4 */
708     if (m_faceOffset[5] > -1) {
709     const double tmp0_22 = 0.16666666666666666667/h2;
710     const double tmp0_16 = 0.62200846792814621559/h2;
711     const double tmp0_33 = -0.044658198738520451079/h2;
712     const double tmp0_0 = -0.78867513459481288225/h0;
713     const double tmp0_21 = -0.16666666666666666667/h2;
714     const double tmp0_17 = 0.16666666666666666667/h2;
715     const double tmp0_1 = 0.78867513459481288225/h0;
716     const double tmp0_20 = -0.044658198738520451079/h2;
717     const double tmp0_14 = 0.21132486540518711775/h1;
718     const double tmp0_2 = -0.21132486540518711775/h0;
719     const double tmp0_27 = -0.16666666666666666667/h2;
720     const double tmp0_15 = 0.78867513459481288225/h1;
721     const double tmp0_3 = 0.21132486540518711775/h0;
722     const double tmp0_26 = -0.16666666666666666667/h2;
723     const double tmp0_12 = -0.21132486540518711775/h1;
724     const double tmp0_25 = 0.16666666666666666667/h2;
725     const double tmp0_13 = -0.78867513459481288225/h1;
726     const double tmp0_24 = 0.044658198738520451079/h2;
727     const double tmp0_10 = 0.78867513459481288225/h1;
728     const double tmp0_11 = 0.21132486540518711775/h1;
729     const double tmp0_34 = -0.62200846792814621559/h2;
730     const double tmp0_35 = -0.16666666666666666667/h2;
731     const double tmp0_8 = -0.78867513459481288225/h1;
732     const double tmp0_29 = -0.62200846792814621559/h2;
733     const double tmp0_9 = -0.21132486540518711775/h1;
734     const double tmp0_30 = 0.044658198738520451079/h2;
735     const double tmp0_28 = -0.044658198738520451079/h2;
736     const double tmp0_32 = 0.62200846792814621559/h2;
737     const double tmp0_31 = 0.16666666666666666667/h2;
738     const double tmp0_18 = 0.044658198738520451079/h2;
739     const double tmp0_4 = -0.21132486540518711775/h0;
740     const double tmp0_19 = -0.62200846792814621559/h2;
741     const double tmp0_5 = 0.21132486540518711775/h0;
742     const double tmp0_6 = -0.78867513459481288225/h0;
743     const double tmp0_23 = 0.62200846792814621559/h2;
744     const double tmp0_7 = 0.78867513459481288225/h0;
745     #pragma omp parallel for
746     for (index_t k1=0; k1 < m_NE1; ++k1) {
747     for (index_t k0=0; k0 < m_NE0; ++k0) {
748     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
749     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
750     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
751     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
752     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-2, m_N0,m_N1));
753     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-2, m_N0,m_N1));
754     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-2, m_N0,m_N1));
755     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-2, m_N0,m_N1));
756     double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
757     for (index_t i=0; i < numComp; ++i) {
758     o[INDEX3(i,0,0,numComp,3)] = f_001[i]*tmp0_0 + f_011[i]*tmp0_2 + f_101[i]*tmp0_1 + f_111[i]*tmp0_3;
759     o[INDEX3(i,1,0,numComp,3)] = f_001[i]*tmp0_8 + f_011[i]*tmp0_10 + f_101[i]*tmp0_9 + f_111[i]*tmp0_11;
760     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_19 + f_001[i]*tmp0_16 + f_110[i]*tmp0_20 + f_111[i]*tmp0_18 + tmp0_17*(f_011[i] + f_101[i]) + tmp0_21*(f_010[i] + f_100[i]);
761     o[INDEX3(i,0,1,numComp,3)] = f_001[i]*tmp0_0 + f_011[i]*tmp0_2 + f_101[i]*tmp0_1 + f_111[i]*tmp0_3;
762     o[INDEX3(i,1,1,numComp,3)] = f_001[i]*tmp0_12 + f_011[i]*tmp0_14 + f_101[i]*tmp0_13 + f_111[i]*tmp0_15;
763     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_22 + f_010[i]*tmp0_28 + f_011[i]*tmp0_24 + f_100[i]*tmp0_29 + f_101[i]*tmp0_23 + f_110[i]*tmp0_27 + f_111[i]*tmp0_25;
764     o[INDEX3(i,0,2,numComp,3)] = f_001[i]*tmp0_4 + f_011[i]*tmp0_6 + f_101[i]*tmp0_5 + f_111[i]*tmp0_7;
765     o[INDEX3(i,1,2,numComp,3)] = f_001[i]*tmp0_8 + f_011[i]*tmp0_10 + f_101[i]*tmp0_9 + f_111[i]*tmp0_11;
766     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_22 + f_010[i]*tmp0_29 + f_011[i]*tmp0_23 + f_100[i]*tmp0_28 + f_101[i]*tmp0_24 + f_110[i]*tmp0_27 + f_111[i]*tmp0_25;
767     o[INDEX3(i,0,3,numComp,3)] = f_001[i]*tmp0_4 + f_011[i]*tmp0_6 + f_101[i]*tmp0_5 + f_111[i]*tmp0_7;
768     o[INDEX3(i,1,3,numComp,3)] = f_001[i]*tmp0_12 + f_011[i]*tmp0_14 + f_101[i]*tmp0_13 + f_111[i]*tmp0_15;
769     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_33 + f_001[i]*tmp0_30 + f_110[i]*tmp0_34 + f_111[i]*tmp0_32 + tmp0_31*(f_011[i] + f_101[i]) + tmp0_35*(f_010[i] + f_100[i]);
770     } /* end of component loop i */
771     } /* end of k0 loop */
772     } /* end of k1 loop */
773     } /* end of face 5 */
774     /* GENERATOR SNIP_GRAD_FACES BOTTOM */
775 caltinay 3703 } else {
776 caltinay 3707 stringstream msg;
777     msg << "setToGradient() not implemented for "
778     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
779     throw RipleyException(msg.str());
780 caltinay 3703 }
781     }
782    
783 caltinay 3691 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
784     bool reducedColOrder) const
785     {
786     if (reducedRowOrder || reducedColOrder)
787     throw RipleyException("getPattern() not implemented for reduced order");
788    
789     throw RipleyException("getPattern() not implemented");
790     }
791    
792     void Brick::Print_Mesh_Info(const bool full) const
793     {
794     RipleyDomain::Print_Mesh_Info(full);
795     if (full) {
796     cout << " Id Coordinates" << endl;
797     cout.precision(15);
798     cout.setf(ios::scientific, ios::floatfield);
799 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
800     pair<double,double> ydy = getFirstCoordAndSpacing(1);
801     pair<double,double> zdz = getFirstCoordAndSpacing(2);
802 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
803     cout << " " << setw(5) << m_nodeId[i]
804 caltinay 3698 << " " << xdx.first+(i%m_N0)*xdx.second
805     << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
806     << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
807 caltinay 3691 }
808     }
809     }
810    
811 caltinay 3698 IndexVector Brick::getNumNodesPerDim() const
812     {
813     IndexVector ret;
814     ret.push_back(m_N0);
815     ret.push_back(m_N1);
816     ret.push_back(m_N2);
817     return ret;
818     }
819    
820     IndexVector Brick::getNumElementsPerDim() const
821     {
822     IndexVector ret;
823     ret.push_back(m_NE0);
824     ret.push_back(m_NE1);
825     ret.push_back(m_NE2);
826     return ret;
827     }
828    
829     IndexVector Brick::getNumFacesPerBoundary() const
830     {
831     IndexVector ret(6, 0);
832     //left
833     if (m_offset0==0)
834     ret[0]=m_NE1*m_NE2;
835     //right
836     if (m_mpiInfo->rank%m_NX==m_NX-1)
837     ret[1]=m_NE1*m_NE2;
838     //bottom
839     if (m_offset1==0)
840     ret[2]=m_NE0*m_NE2;
841     //top
842     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
843     ret[3]=m_NE0*m_NE2;
844     //front
845     if (m_offset2==0)
846     ret[4]=m_NE0*m_NE1;
847     //back
848     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
849     ret[5]=m_NE0*m_NE1;
850     return ret;
851     }
852    
853     pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
854     {
855     if (dim==0)
856     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
857     else if (dim==1)
858     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
859     else if (dim==2)
860     return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
861    
862     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
863     }
864    
865    
866 caltinay 3691 //protected
867     dim_t Brick::getNumFaceElements() const
868     {
869     dim_t n=0;
870     //left
871     if (m_offset0==0)
872     n+=m_NE1*m_NE2;
873     //right
874     if (m_mpiInfo->rank%m_NX==m_NX-1)
875     n+=m_NE1*m_NE2;
876     //bottom
877     if (m_offset1==0)
878     n+=m_NE0*m_NE2;
879     //top
880     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
881     n+=m_NE0*m_NE2;
882     //front
883     if (m_offset2==0)
884     n+=m_NE0*m_NE1;
885     //back
886     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
887     n+=m_NE0*m_NE1;
888    
889     return n;
890     }
891    
892     //protected
893     void Brick::assembleCoordinates(escript::Data& arg) const
894     {
895     escriptDataC x = arg.getDataC();
896     int numDim = m_numDim;
897     if (!isDataPointShapeEqual(&x, 1, &numDim))
898     throw RipleyException("setToX: Invalid Data object shape");
899     if (!numSamplesEqual(&x, 1, getNumNodes()))
900     throw RipleyException("setToX: Illegal number of samples in Data object");
901    
902 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
903     pair<double,double> ydy = getFirstCoordAndSpacing(1);
904     pair<double,double> zdz = getFirstCoordAndSpacing(2);
905 caltinay 3691 arg.requireWrite();
906     #pragma omp parallel for
907     for (dim_t i2 = 0; i2 < m_N2; i2++) {
908     for (dim_t i1 = 0; i1 < m_N1; i1++) {
909     for (dim_t i0 = 0; i0 < m_N0; i0++) {
910     double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
911 caltinay 3698 point[0] = xdx.first+i0*xdx.second;
912     point[1] = ydy.first+i1*ydy.second;
913     point[2] = zdz.first+i2*zdz.second;
914 caltinay 3691 }
915     }
916     }
917     }
918    
919     //private
920     void Brick::populateSampleIds()
921     {
922 caltinay 3697 // identifiers are ordered from left to right, bottom to top, front to back
923     // on each rank, except for the shared nodes which are owned by the rank
924     // below / to the left / to the front of the current rank
925 caltinay 3698
926     // build node distribution vector first.
927     // m_nodeDistribution[i] is the first node id on rank i, that is
928     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
929     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
930     m_nodeDistribution[1]=getNumNodes();
931     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
932     const index_t x = k%m_NX;
933     const index_t y = k%(m_NX*m_NY)/m_NX;
934     const index_t z = k/(m_NX*m_NY);
935     index_t numNodes=getNumNodes();
936     if (x>0)
937     numNodes-=m_N1*m_N2;
938     if (y>0)
939     numNodes-=m_N0*m_N2;
940     if (z>0)
941     numNodes-=m_N0*m_N1;
942     // if an edge was subtracted twice add it back
943     if (x>0 && y>0)
944     numNodes+=m_N2;
945     if (x>0 && z>0)
946     numNodes+=m_N1;
947     if (y>0 && z>0)
948     numNodes+=m_N0;
949     // the corner node was removed 3x and added back 3x, so subtract it
950     if (x>0 && y>0 && z>0)
951     numNodes--;
952     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
953     }
954     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
955    
956 caltinay 3691 m_nodeId.resize(getNumNodes());
957 caltinay 3698
958     // the bottom, left and front planes are not owned by this rank so the
959     // identifiers need to be computed accordingly
960     const index_t left = (m_offset0==0 ? 0 : 1);
961     const index_t bottom = (m_offset1==0 ? 0 : 1);
962     const index_t front = (m_offset2==0 ? 0 : 1);
963    
964     // case 1: all nodes on left plane are owned by rank on the left
965     if (left>0) {
966     const int neighbour=m_mpiInfo->rank-1;
967     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
968     const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
969 caltinay 3691 #pragma omp parallel for
970 caltinay 3698 for (dim_t i2=front; i2<m_N2; i2++) {
971     for (dim_t i1=bottom; i1<m_N1; i1++) {
972     m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
973     + (i1-bottom+1)*leftN0
974     + (i2-front)*leftN0*leftN1 - 1;
975     }
976     }
977 caltinay 3691 }
978 caltinay 3698 // case 2: all nodes on bottom plane are owned by rank below
979     if (bottom>0) {
980     const int neighbour=m_mpiInfo->rank-m_NX;
981     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
982     const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
983     #pragma omp parallel for
984     for (dim_t i2=front; i2<m_N2; i2++) {
985     for (dim_t i0=left; i0<m_N0; i0++) {
986     m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
987     + bottomN0*(bottomN1-1)
988     + (i2-front)*bottomN0*bottomN1 + i0-left;
989     }
990     }
991     }
992     // case 3: all nodes on front plane are owned by rank in front
993     if (front>0) {
994     const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
995     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
996     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
997     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
998     #pragma omp parallel for
999     for (dim_t i1=bottom; i1<m_N1; i1++) {
1000     for (dim_t i0=left; i0<m_N0; i0++) {
1001     m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
1002     + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
1003     }
1004     }
1005     }
1006     // case 4: nodes on front bottom edge are owned by the corresponding rank
1007     if (front>0 && bottom>0) {
1008     const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
1009     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1010     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1011     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1012     #pragma omp parallel for
1013     for (dim_t i0=left; i0<m_N0; i0++) {
1014     m_nodeId[i0]=m_nodeDistribution[neighbour]
1015     + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
1016     }
1017     }
1018     // case 5: nodes on left bottom edge are owned by the corresponding rank
1019     if (left>0 && bottom>0) {
1020     const int neighbour=m_mpiInfo->rank-m_NX-1;
1021     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1022     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1023     #pragma omp parallel for
1024     for (dim_t i2=front; i2<m_N2; i2++) {
1025     m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
1026     + (1+i2-front)*N0*N1-1;
1027     }
1028     }
1029     // case 6: nodes on left front edge are owned by the corresponding rank
1030     if (left>0 && front>0) {
1031     const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
1032     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1033     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1034     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
1035     #pragma omp parallel for
1036     for (dim_t i1=bottom; i1<m_N1; i1++) {
1037     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1038     + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
1039     }
1040     }
1041     // case 7: bottom-left-front corner node owned by corresponding rank
1042     if (left>0 && bottom>0 && front>0) {
1043     const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
1044     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1045     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
1046     const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
1047     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
1048     }
1049 caltinay 3697
1050 caltinay 3698 // the rest of the id's are contiguous
1051     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1052     #pragma omp parallel for
1053     for (dim_t i2=front; i2<m_N2; i2++) {
1054     for (dim_t i1=bottom; i1<m_N1; i1++) {
1055     for (dim_t i0=left; i0<m_N0; i0++) {
1056     m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
1057     +(i1-bottom)*(m_N0-left)
1058     +(i2-front)*(m_N0-left)*(m_N1-bottom);
1059     }
1060     }
1061     }
1062    
1063 caltinay 3697 // elements
1064     m_elementId.resize(getNumElements());
1065     #pragma omp parallel for
1066     for (dim_t k=0; k<getNumElements(); k++) {
1067     m_elementId[k]=k;
1068     }
1069    
1070     // face elements
1071     m_faceId.resize(getNumFaceElements());
1072     #pragma omp parallel for
1073     for (dim_t k=0; k<getNumFaceElements(); k++) {
1074     m_faceId[k]=k;
1075     }
1076 caltinay 3704
1077     // generate face offset vector
1078     const IndexVector facesPerEdge = getNumFacesPerBoundary();
1079     m_faceOffset.assign(facesPerEdge.size(), -1);
1080     index_t offset=0;
1081     for (size_t i=0; i<facesPerEdge.size(); i++) {
1082     if (facesPerEdge[i]>0) {
1083     m_faceOffset[i]=offset;
1084     offset+=facesPerEdge[i];
1085     }
1086     }
1087 caltinay 3691 }
1088    
1089 caltinay 3703 //protected
1090     void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in) const
1091     {
1092     const dim_t numComp = in.getDataPointSize();
1093     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1094     const double tmp0_3 = 0.0094373878376559314545;
1095     const double tmp0_2 = 0.035220810900864519624;
1096     const double tmp0_1 = 0.13144585576580214704;
1097     const double tmp0_0 = 0.49056261216234406855;
1098     #pragma omp parallel for
1099     for (index_t k2=0; k2 < m_NE2; ++k2) {
1100     for (index_t k1=0; k1 < m_NE1; ++k1) {
1101     for (index_t k0=0; k0 < m_NE0; ++k0) {
1102     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
1103     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
1104     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
1105     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
1106     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
1107     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
1108     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
1109     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
1110     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
1111     for (index_t i=0; i < numComp; ++i) {
1112     o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i] + f_100[i]) + tmp0_2*(f_011[i] + f_101[i] + f_110[i]);
1113     o[INDEX2(i,numComp,1)] = f_011[i]*tmp0_3 + f_100[i]*tmp0_0 + tmp0_1*(f_000[i] + f_101[i] + f_110[i]) + tmp0_2*(f_001[i] + f_010[i] + f_111[i]);
1114     o[INDEX2(i,numComp,2)] = f_010[i]*tmp0_0 + f_101[i]*tmp0_3 + tmp0_1*(f_000[i] + f_011[i] + f_110[i]) + tmp0_2*(f_001[i] + f_100[i] + f_111[i]);
1115     o[INDEX2(i,numComp,3)] = f_001[i]*tmp0_3 + f_110[i]*tmp0_0 + tmp0_1*(f_010[i] + f_100[i] + f_111[i]) + tmp0_2*(f_000[i] + f_011[i] + f_101[i]);
1116     o[INDEX2(i,numComp,4)] = f_001[i]*tmp0_0 + f_110[i]*tmp0_3 + tmp0_1*(f_000[i] + f_011[i] + f_101[i]) + tmp0_2*(f_010[i] + f_100[i] + f_111[i]);
1117     o[INDEX2(i,numComp,5)] = f_010[i]*tmp0_3 + f_101[i]*tmp0_0 + tmp0_1*(f_001[i] + f_100[i] + f_111[i]) + tmp0_2*(f_000[i] + f_011[i] + f_110[i]);
1118     o[INDEX2(i,numComp,6)] = f_011[i]*tmp0_0 + f_100[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i] + f_111[i]) + tmp0_2*(f_000[i] + f_101[i] + f_110[i]);
1119     o[INDEX2(i,numComp,7)] = f_000[i]*tmp0_3 + f_111[i]*tmp0_0 + tmp0_1*(f_011[i] + f_101[i] + f_110[i]) + tmp0_2*(f_001[i] + f_010[i] + f_100[i]);
1120     } /* end of component loop i */
1121     } /* end of k0 loop */
1122     } /* end of k1 loop */
1123     } /* end of k2 loop */
1124     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1125     }
1126    
1127     //protected
1128     void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in) const
1129     {
1130 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1131     /* GENERATOR SNIP_INTERPOLATE_FACES TOP */
1132     if (m_faceOffset[0] > -1) {
1133     const double tmp0_2 = 0.044658198738520451079;
1134     const double tmp0_1 = 0.16666666666666666667;
1135     const double tmp0_0 = 0.62200846792814621559;
1136     #pragma omp parallel for
1137     for (index_t k2=0; k2 < m_NE2; ++k2) {
1138     for (index_t k1=0; k1 < m_NE1; ++k1) {
1139     const register double* f_000 = in.getSampleDataRO(INDEX3(0,k1,k2, m_N0,m_N1));
1140     const register double* f_001 = in.getSampleDataRO(INDEX3(0,k1,k2+1, m_N0,m_N1));
1141     const register double* f_011 = in.getSampleDataRO(INDEX3(0,k1+1,k2+1, m_N0,m_N1));
1142     const register double* f_010 = in.getSampleDataRO(INDEX3(0,k1+1,k2, m_N0,m_N1));
1143 caltinay 3707 double* o = out.getSampleDataRW(m_faceOffset[0]+INDEX2(k1,k2,m_NE1));
1144 caltinay 3704 for (index_t i=0; i < numComp; ++i) {
1145     o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_2 + tmp0_1*(f_001[i] + f_010[i]);
1146     o[INDEX2(i,numComp,1)] = f_001[i]*tmp0_2 + f_010[i]*tmp0_0 + tmp0_1*(f_000[i] + f_011[i]);
1147     o[INDEX2(i,numComp,2)] = f_001[i]*tmp0_0 + f_010[i]*tmp0_2 + tmp0_1*(f_000[i] + f_011[i]);
1148     o[INDEX2(i,numComp,3)] = f_000[i]*tmp0_2 + f_011[i]*tmp0_0 + tmp0_1*(f_001[i] + f_010[i]);
1149     } /* end of component loop i */
1150     } /* end of k1 loop */
1151     } /* end of k2 loop */
1152     } /* end of face 0 */
1153     if (m_faceOffset[1] > -1) {
1154     const double tmp0_2 = 0.044658198738520451079;
1155     const double tmp0_1 = 0.62200846792814621559;
1156     const double tmp0_0 = 0.16666666666666666667;
1157     #pragma omp parallel for
1158     for (index_t k2=0; k2 < m_NE2; ++k2) {
1159     for (index_t k1=0; k1 < m_NE1; ++k1) {
1160     const register double* f_101 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2+1, m_N0,m_N1));
1161     const register double* f_100 = in.getSampleDataRO(INDEX3(m_N0-1,k1,k2, m_N0,m_N1));
1162     const register double* f_110 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2, m_N0,m_N1));
1163     const register double* f_111 = in.getSampleDataRO(INDEX3(m_N0-1,k1+1,k2+1, m_N0,m_N1));
1164 caltinay 3707 double* o = out.getSampleDataRW(m_faceOffset[1]+INDEX2(k1,k2,m_NE1));
1165 caltinay 3704 for (index_t i=0; i < numComp; ++i) {
1166     o[INDEX2(i,numComp,0)] = f_100[i]*tmp0_1 + f_111[i]*tmp0_2 + tmp0_0*(f_101[i] + f_110[i]);
1167     o[INDEX2(i,numComp,1)] = f_101[i]*tmp0_2 + f_110[i]*tmp0_1 + tmp0_0*(f_100[i] + f_111[i]);
1168     o[INDEX2(i,numComp,2)] = f_101[i]*tmp0_1 + f_110[i]*tmp0_2 + tmp0_0*(f_100[i] + f_111[i]);
1169     o[INDEX2(i,numComp,3)] = f_100[i]*tmp0_2 + f_111[i]*tmp0_1 + tmp0_0*(f_101[i] + f_110[i]);
1170     } /* end of component loop i */
1171     } /* end of k1 loop */
1172     } /* end of k2 loop */
1173     } /* end of face 1 */
1174     if (m_faceOffset[2] > -1) {
1175     const double tmp0_2 = 0.044658198738520451079;
1176     const double tmp0_1 = 0.16666666666666666667;
1177     const double tmp0_0 = 0.62200846792814621559;
1178     #pragma omp parallel for
1179     for (index_t k2=0; k2 < m_NE2; ++k2) {
1180     for (index_t k0=0; k0 < m_NE0; ++k0) {
1181     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,0,k2, m_N0,m_N1));
1182     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,0,k2+1, m_N0,m_N1));
1183     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,0,k2+1, m_N0,m_N1));
1184     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,0,k2, m_N0,m_N1));
1185 caltinay 3707 double* o = out.getSampleDataRW(m_faceOffset[2]+INDEX2(k0,k2,m_NE0));
1186 caltinay 3704 for (index_t i=0; i < numComp; ++i) {
1187     o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_101[i]*tmp0_2 + tmp0_1*(f_001[i] + f_100[i]);
1188     o[INDEX2(i,numComp,1)] = f_001[i]*tmp0_2 + f_100[i]*tmp0_0 + tmp0_1*(f_000[i] + f_101[i]);
1189     o[INDEX2(i,numComp,2)] = f_001[i]*tmp0_0 + f_100[i]*tmp0_2 + tmp0_1*(f_000[i] + f_101[i]);
1190     o[INDEX2(i,numComp,3)] = f_000[i]*tmp0_2 + f_101[i]*tmp0_0 + tmp0_1*(f_001[i] + f_100[i]);
1191     } /* end of component loop i */
1192     } /* end of k0 loop */
1193     } /* end of k2 loop */
1194     } /* end of face 2 */
1195     if (m_faceOffset[3] > -1) {
1196     const double tmp0_2 = 0.044658198738520451079;
1197     const double tmp0_1 = 0.62200846792814621559;
1198     const double tmp0_0 = 0.16666666666666666667;
1199     #pragma omp parallel for
1200     for (index_t k2=0; k2 < m_NE2; ++k2) {
1201     for (index_t k0=0; k0 < m_NE0; ++k0) {
1202     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2, m_N0,m_N1));
1203     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2+1, m_N0,m_N1));
1204     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,m_N1-1,k2, m_N0,m_N1));
1205     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,m_N1-1,k2+1, m_N0,m_N1));
1206 caltinay 3707 double* o = out.getSampleDataRW(m_faceOffset[3]+INDEX2(k0,k2,m_NE0));
1207 caltinay 3704 for (index_t i=0; i < numComp; ++i) {
1208     o[INDEX2(i,numComp,0)] = f_010[i]*tmp0_1 + f_111[i]*tmp0_2 + tmp0_0*(f_011[i] + f_110[i]);
1209     o[INDEX2(i,numComp,1)] = f_011[i]*tmp0_2 + f_110[i]*tmp0_1 + tmp0_0*(f_010[i] + f_111[i]);
1210     o[INDEX2(i,numComp,2)] = f_011[i]*tmp0_1 + f_110[i]*tmp0_2 + tmp0_0*(f_010[i] + f_111[i]);
1211     o[INDEX2(i,numComp,3)] = f_010[i]*tmp0_2 + f_111[i]*tmp0_1 + tmp0_0*(f_011[i] + f_110[i]);
1212     } /* end of component loop i */
1213     } /* end of k0 loop */
1214     } /* end of k2 loop */
1215     } /* end of face 3 */
1216     if (m_faceOffset[4] > -1) {
1217     const double tmp0_2 = 0.044658198738520451079;
1218     const double tmp0_1 = 0.16666666666666666667;
1219     const double tmp0_0 = 0.62200846792814621559;
1220     #pragma omp parallel for
1221     for (index_t k1=0; k1 < m_NE1; ++k1) {
1222     for (index_t k0=0; k0 < m_NE0; ++k0) {
1223     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,0, m_N0,m_N1));
1224     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,0, m_N0,m_N1));
1225     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,0, m_N0,m_N1));
1226     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,0, m_N0,m_N1));
1227 caltinay 3707 double* o = out.getSampleDataRW(m_faceOffset[4]+INDEX2(k0,k1,m_NE0));
1228 caltinay 3704 for (index_t i=0; i < numComp; ++i) {
1229     o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_110[i]*tmp0_2 + tmp0_1*(f_010[i] + f_100[i]);
1230     o[INDEX2(i,numComp,1)] = f_010[i]*tmp0_2 + f_100[i]*tmp0_0 + tmp0_1*(f_000[i] + f_110[i]);
1231     o[INDEX2(i,numComp,2)] = f_010[i]*tmp0_0 + f_100[i]*tmp0_2 + tmp0_1*(f_000[i] + f_110[i]);
1232     o[INDEX2(i,numComp,3)] = f_000[i]*tmp0_2 + f_110[i]*tmp0_0 + tmp0_1*(f_010[i] + f_100[i]);
1233     } /* end of component loop i */
1234     } /* end of k0 loop */
1235     } /* end of k1 loop */
1236     } /* end of face 4 */
1237     if (m_faceOffset[5] > -1) {
1238     const double tmp0_2 = 0.044658198738520451079;
1239     const double tmp0_1 = 0.16666666666666666667;
1240     const double tmp0_0 = 0.62200846792814621559;
1241     #pragma omp parallel for
1242     for (index_t k1=0; k1 < m_NE1; ++k1) {
1243     for (index_t k0=0; k0 < m_NE0; ++k0) {
1244     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,m_N2-1, m_N0,m_N1));
1245     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,m_N2-1, m_N0,m_N1));
1246     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,m_N2-1, m_N0,m_N1));
1247     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,m_N2-1, m_N0,m_N1));
1248 caltinay 3707 double* o = out.getSampleDataRW(m_faceOffset[5]+INDEX2(k0,k1,m_NE0));
1249 caltinay 3704 for (index_t i=0; i < numComp; ++i) {
1250     o[INDEX2(i,numComp,0)] = f_001[i]*tmp0_0 + f_111[i]*tmp0_2 + tmp0_1*(f_011[i] + f_101[i]);
1251     o[INDEX2(i,numComp,1)] = f_011[i]*tmp0_2 + f_101[i]*tmp0_0 + tmp0_1*(f_001[i] + f_111[i]);
1252     o[INDEX2(i,numComp,2)] = f_011[i]*tmp0_0 + f_101[i]*tmp0_2 + tmp0_1*(f_001[i] + f_111[i]);
1253     o[INDEX2(i,numComp,3)] = f_001[i]*tmp0_2 + f_111[i]*tmp0_0 + tmp0_1*(f_011[i] + f_101[i]);
1254     } /* end of component loop i */
1255     } /* end of k0 loop */
1256     } /* end of k1 loop */
1257     } /* end of face 5 */
1258     /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1259 caltinay 3703 }
1260    
1261 caltinay 3691 } // end of namespace ripley
1262    

  ViewVC Help
Powered by ViewVC 1.1.26